diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-03-21 17:19:04 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-03-21 17:19:04 +0000 |
commit | 310edf444908b09ea6d00c03baceb7925f3bb7a2 (patch) | |
tree | 7064577c7fa7a851e2e930beb606ea8237b0bbd2 /ml/dlib/dlib/svm | |
parent | Releasing debian version 1.44.3-2. (diff) | |
download | netdata-310edf444908b09ea6d00c03baceb7925f3bb7a2.tar.xz netdata-310edf444908b09ea6d00c03baceb7925f3bb7a2.zip |
Merging upstream version 1.45.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'ml/dlib/dlib/svm')
135 files changed, 0 insertions, 45082 deletions
diff --git a/ml/dlib/dlib/svm/active_learning.h b/ml/dlib/dlib/svm/active_learning.h deleted file mode 100644 index 581540e67..000000000 --- a/ml/dlib/dlib/svm/active_learning.h +++ /dev/null @@ -1,162 +0,0 @@ -// Copyright (C) 2012 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_ACTIVE_LEARnING_Hh_ -#define DLIB_ACTIVE_LEARnING_Hh_ - -#include "active_learning_abstract.h" - -#include "svm_c_linear_dcd_trainer.h" -#include <vector> - -namespace dlib -{ - - enum active_learning_mode - { - max_min_margin, - ratio_margin - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename kernel_type, - typename in_sample_vector_type, - typename in_scalar_vector_type, - typename in_sample_vector_type2 - > - std::vector<unsigned long> impl_rank_unlabeled_training_samples ( - const svm_c_linear_dcd_trainer<kernel_type>& trainer, - const in_sample_vector_type& samples, - const in_scalar_vector_type& labels, - const in_sample_vector_type2& unlabeled_samples, - const active_learning_mode mode - ) - { - DLIB_ASSERT(is_vector(unlabeled_samples) && - (samples.size() == 0 || is_learning_problem(samples, labels)) , - "\t std::vector<unsigned long> rank_unlabeled_training_samples()" - << "\n\t Invalid inputs were given to this function" - << "\n\t is_vector(unlabeled_samples): " << is_vector(unlabeled_samples) - << "\n\t is_learning_problem(samples, labels): " << is_learning_problem(samples, labels) - << "\n\t samples.size(): " << samples.size() - << "\n\t labels.size(): " << labels.size() - ); - - // If there aren't any training samples then all unlabeled_samples are equally good. - // So just report an arbitrary ordering. - if (samples.size() == 0 || unlabeled_samples.size() == 0) - { - std::vector<unsigned long> ret(unlabeled_samples.size()); - for (unsigned long i = 0; i < ret.size(); ++i) - ret[i] = i; - - return ret; - } - - // We are going to score each unlabeled sample and put the score and index into - // results. Then at the end of this function we just sort it and return the indices. - std::vector<std::pair<double, unsigned long> > results; - results.resize(unlabeled_samples.size()); - - // make sure we use this trainer's ability to warm start itself since that will make - // this whole function run a lot faster. But first, we need to find out what the state - // we will be warm starting from is. - typedef typename svm_c_linear_dcd_trainer<kernel_type>::optimizer_state optimizer_state; - optimizer_state state; - trainer.train(samples, labels, state); // call train() just to get state - - decision_function<kernel_type> df; - - std::vector<typename kernel_type::sample_type> temp_samples; - std::vector<typename kernel_type::scalar_type> temp_labels; - temp_samples.reserve(samples.size()+1); - temp_labels.reserve(labels.size()+1); - temp_samples.assign(samples.begin(), samples.end()); - temp_labels.assign(labels.begin(), labels.end()); - temp_samples.resize(temp_samples.size()+1); - temp_labels.resize(temp_labels.size()+1); - - - for (long i = 0; i < unlabeled_samples.size(); ++i) - { - temp_samples.back() = unlabeled_samples(i); - // figure out the margin for each possible labeling of this sample. - - optimizer_state temp(state); - temp_labels.back() = +1; - df = trainer.train(temp_samples, temp_labels, temp); - const double margin_p = temp_labels.back()*df(temp_samples.back()); - - temp = state; - temp_labels.back() = -1; - df = trainer.train(temp_samples, temp_labels, temp); - const double margin_n = temp_labels.back()*df(temp_samples.back()); - - if (mode == max_min_margin) - { - // The score for this sample is its min possible margin over possible labels. - // Therefore, this score measures how much flexibility we have to label this - // sample however we want. The intuition being that the most useful points to - // label are the ones that are still free to obtain either label. - results[i] = std::make_pair(std::min(margin_p, margin_n), i); - } - else - { - // In this case, the score for the sample is a ratio that tells how close the - // two margin values are to each other. The closer they are the better. So in - // this case we are saying we are looking for samples that have the same - // preference for either class label. - if (std::abs(margin_p) >= std::abs(margin_n)) - { - if (margin_p != 0) - results[i] = std::make_pair(margin_n/margin_p, i); - else // if both are == 0 then say 0/0 == 1 - results[i] = std::make_pair(1, i); - } - else - { - results[i] = std::make_pair(margin_p/margin_n, i); - } - } - } - - // sort the results so the highest scoring samples come first. - std::sort(results.rbegin(), results.rend()); - - // transfer results into a vector with just sample indices so we can return it. - std::vector<unsigned long> ret(results.size()); - for (unsigned long i = 0; i < ret.size(); ++i) - ret[i] = results[i].second; - return ret; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename kernel_type, - typename in_sample_vector_type, - typename in_scalar_vector_type, - typename in_sample_vector_type2 - > - std::vector<unsigned long> rank_unlabeled_training_samples ( - const svm_c_linear_dcd_trainer<kernel_type>& trainer, - const in_sample_vector_type& samples, - const in_scalar_vector_type& labels, - const in_sample_vector_type2& unlabeled_samples, - const active_learning_mode mode = max_min_margin - ) - { - return impl_rank_unlabeled_training_samples(trainer, - mat(samples), - mat(labels), - mat(unlabeled_samples), - mode); - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_ACTIVE_LEARnING_Hh_ - diff --git a/ml/dlib/dlib/svm/active_learning_abstract.h b/ml/dlib/dlib/svm/active_learning_abstract.h deleted file mode 100644 index 76a5120e3..000000000 --- a/ml/dlib/dlib/svm/active_learning_abstract.h +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright (C) 2012 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_ACTIVE_LEARnING_ABSTRACT_Hh_ -#ifdef DLIB_ACTIVE_LEARnING_ABSTRACT_Hh_ - -#include "svm_c_linear_dcd_trainer_abstract.h" -#include <vector> - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - enum active_learning_mode - { - max_min_margin, - ratio_margin - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename kernel_type, - typename in_sample_vector_type, - typename in_scalar_vector_type, - typename in_sample_vector_type2 - > - std::vector<unsigned long> rank_unlabeled_training_samples ( - const svm_c_linear_dcd_trainer<kernel_type>& trainer, - const in_sample_vector_type& samples, - const in_scalar_vector_type& labels, - const in_sample_vector_type2& unlabeled_samples, - const active_learning_mode mode = max_min_margin - ); - /*! - requires - - if (samples.size() != 0) then - - it must be legal to call trainer.train(samples, labels) - - is_learning_problem(samples, labels) == true - - unlabeled_samples must contain the same kind of vectors as samples. - - unlabeled_samples, samples, and labels must be matrices or types of - objects convertible to a matrix via mat(). - - is_vector(unlabeled_samples) == true - ensures - - Suppose that we wish to learn a binary classifier by calling - trainer.train(samples, labels) but we are also interested in selecting one of - the elements of unlabeled_samples to add to our training data. Since doing - this requires us to find out the label of the sample, a potentially tedious - or expensive process, we would like to select the "best" element from - unlabeled_samples for labeling. The rank_unlabeled_training_samples() - attempts to find this "best" element. In particular, this function returns a - ranked list of all the elements in unlabeled_samples such that that the - "best" elements come first. - - The method used by this function is described in the paper: - Support Vector Machine Active Learning with Applications to Text Classification - by Simon Tong and Daphne Koller - In particular, this function implements the MaxMin Margin and Ratio Margin - selection strategies described in the paper. Moreover, the mode argument - to this function selects which of these strategies is used. - - returns a std::vector V such that: - - V contains a list of all the indices from unlabeled_samples. Moreover, - they are ordered so that the most useful samples come first. - - V.size() == unlabeled_samples.size() - - unlabeled_samples[V[0]] == The best sample to add into the training set. - - unlabeled_samples[V[1]] == The second best sample to add into the training set. - - unlabeled_samples[V[i]] == The i-th best sample to add into the training set. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_ACTIVE_LEARnING_ABSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/svm/assignment_function.h b/ml/dlib/dlib/svm/assignment_function.h deleted file mode 100644 index fdacb2c17..000000000 --- a/ml/dlib/dlib/svm/assignment_function.h +++ /dev/null @@ -1,255 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_ASSIGNMENT_FuNCTION_Hh_ -#define DLIB_ASSIGNMENT_FuNCTION_Hh_ - -#include "assignment_function_abstract.h" -#include "../matrix.h" -#include <vector> -#include "../optimization/max_cost_assignment.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - class assignment_function - { - public: - - typedef typename feature_extractor::lhs_element lhs_element; - typedef typename feature_extractor::rhs_element rhs_element; - - - typedef std::pair<std::vector<lhs_element>, std::vector<rhs_element> > sample_type; - - typedef std::vector<long> label_type; - typedef label_type result_type; - - assignment_function() - { - weights.set_size(fe.num_features()); - weights = 0; - bias = 0; - force_assignment = false; - } - - explicit assignment_function( - const matrix<double,0,1>& weights_, - double bias_ - ) : - weights(weights_), - bias(bias_), - force_assignment(false) - { - // make sure requires clause is not broken - DLIB_ASSERT(fe.num_features() == static_cast<unsigned long>(weights_.size()), - "\t assignment_function::assignment_function(weights_)" - << "\n\t These sizes should match" - << "\n\t fe.num_features(): " << fe.num_features() - << "\n\t weights_.size(): " << weights_.size() - << "\n\t this: " << this - ); - - } - - assignment_function( - const matrix<double,0,1>& weights_, - double bias_, - const feature_extractor& fe_ - ) : - fe(fe_), - weights(weights_), - bias(bias_), - force_assignment(false) - { - // make sure requires clause is not broken - DLIB_ASSERT(fe_.num_features() == static_cast<unsigned long>(weights_.size()), - "\t assignment_function::assignment_function(weights_,fe_)" - << "\n\t These sizes should match" - << "\n\t fe_.num_features(): " << fe_.num_features() - << "\n\t weights_.size(): " << weights_.size() - << "\n\t this: " << this - ); - } - - assignment_function( - const matrix<double,0,1>& weights_, - double bias_, - const feature_extractor& fe_, - bool force_assignment_ - ) : - fe(fe_), - weights(weights_), - bias(bias_), - force_assignment(force_assignment_) - { - // make sure requires clause is not broken - DLIB_ASSERT(fe_.num_features() == static_cast<unsigned long>(weights_.size()), - "\t assignment_function::assignment_function(weights_,fe_,force_assignment_)" - << "\n\t These sizes should match" - << "\n\t fe_.num_features(): " << fe_.num_features() - << "\n\t weights_.size(): " << weights_.size() - << "\n\t this: " << this - ); - } - - const feature_extractor& get_feature_extractor ( - ) const { return fe; } - - const matrix<double,0,1>& get_weights ( - ) const { return weights; } - - double get_bias ( - ) const { return bias; } - - bool forces_assignment ( - ) const { return force_assignment; } - - void predict_assignments ( - const std::vector<lhs_element>& lhs, - const std::vector<rhs_element>& rhs, - result_type& assignment - ) const - { - assignment.clear(); - - matrix<double> cost; - unsigned long size; - if (force_assignment) - { - size = std::max(lhs.size(), rhs.size()); - } - else - { - size = rhs.size() + lhs.size(); - } - cost.set_size(size, size); - - typedef typename feature_extractor::feature_vector_type feature_vector_type; - feature_vector_type feats; - - // now fill out the cost assignment matrix - for (long r = 0; r < cost.nr(); ++r) - { - for (long c = 0; c < cost.nc(); ++c) - { - if (r < (long)lhs.size() && c < (long)rhs.size()) - { - fe.get_features(lhs[r], rhs[c], feats); - cost(r,c) = dot(weights, feats) + bias; - } - else - { - cost(r,c) = 0; - } - } - } - - - if (cost.size() != 0) - { - // max_cost_assignment() only works with integer matrices, so convert from - // double to integer. - const double scale = (std::numeric_limits<dlib::int64>::max()/1000)/max(abs(cost)); - matrix<dlib::int64> int_cost = matrix_cast<dlib::int64>(round(cost*scale)); - assignment = max_cost_assignment(int_cost); - assignment.resize(lhs.size()); - } - - // adjust assignment so that non-assignments have a value of -1 - for (unsigned long i = 0; i < assignment.size(); ++i) - { - if (assignment[i] >= (long)rhs.size()) - assignment[i] = -1; - } - } - - void predict_assignments ( - const sample_type& item, - result_type& assignment - ) const - { - predict_assignments(item.first, item.second, assignment); - } - - result_type operator()( - const std::vector<lhs_element>& lhs, - const std::vector<rhs_element>& rhs - ) const - { - result_type temp; - predict_assignments(lhs,rhs,temp); - return temp; - } - - result_type operator() ( - const sample_type& item - ) const - { - return (*this)(item.first, item.second); - } - - private: - - - feature_extractor fe; - matrix<double,0,1> weights; - double bias; - bool force_assignment; - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - void serialize ( - const assignment_function<feature_extractor>& item, - std::ostream& out - ) - { - int version = 2; - serialize(version, out); - serialize(item.get_feature_extractor(), out); - serialize(item.get_weights(), out); - serialize(item.get_bias(), out); - serialize(item.forces_assignment(), out); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - void deserialize ( - assignment_function<feature_extractor>& item, - std::istream& in - ) - { - feature_extractor fe; - matrix<double,0,1> weights; - double bias; - bool force_assignment; - int version = 0; - deserialize(version, in); - if (version != 2) - throw serialization_error("Unexpected version found while deserializing dlib::assignment_function."); - - deserialize(fe, in); - deserialize(weights, in); - deserialize(bias, in); - deserialize(force_assignment, in); - - item = assignment_function<feature_extractor>(weights, bias, fe, force_assignment); - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_ASSIGNMENT_FuNCTION_Hh_ - diff --git a/ml/dlib/dlib/svm/assignment_function_abstract.h b/ml/dlib/dlib/svm/assignment_function_abstract.h deleted file mode 100644 index 927731856..000000000 --- a/ml/dlib/dlib/svm/assignment_function_abstract.h +++ /dev/null @@ -1,342 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_ASSIGNMENT_FuNCTION_ABSTRACT_Hh_ -#ifdef DLIB_ASSIGNMENT_FuNCTION_ABSTRACT_Hh_ - -#include <vector> -#include "../optimization/max_cost_assignment_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - class example_feature_extractor - { - /*! - WHAT THIS OBJECT REPRESENTS - This object defines the interface a feature extractor must implement - if it is to be used with the assignment_function defined at the bottom - of this file. - - The model used by assignment_function objects is the following. - Given two sets of objects, the Left Hand Set (LHS) and Right Hand Set (RHS), - find a one-to-one mapping M from LHS into RHS such that: - M == argmax_m sum_{l in LHS} match_score(l,m(l)) - Where match_score() returns a scalar value indicating how good it is - to say l maps to the RHS element m(l). Additionally, in this model, - m() is allowed to indicate that l doesn't map to anything, and in this - case it is excluded from the sum. - - Finally, match_score() is defined as: - match_score(l,r) == dot(w, PSI(l,r)) + bias - where l is an element of LHS, r is an element of RHS, w is a parameter - vector and bias is a scalar valued parameter. - - Therefore, a feature extractor defines how the PSI() feature vector - is calculated. In particular, PSI() is defined by the get_features() - method of this class. - - THREAD SAFETY - Instances of this object are required to be threadsafe, that is, it should - be safe for multiple threads to make concurrent calls to the member - functions of this object. - - !*/ - - public: - - // This type should be a dlib::matrix capable of storing column vectors - // or an unsorted sparse vector type as defined in dlib/svm/sparse_vector_abstract.h. - typedef matrix_or_sparse_vector_type feature_vector_type; - - // These two typedefs define the types used to represent an element in - // the left hand and right hand sets. You can use any copyable types here. - typedef user_defined_type_1 lhs_element; - typedef user_defined_type_2 rhs_element; - - unsigned long num_features( - ) const; - /*! - ensures - - returns the dimensionality of the PSI() feature vector. - !*/ - - void get_features ( - const lhs_element& left, - const rhs_element& right, - feature_vector_type& feats - ) const; - /*! - ensures - - #feats == PSI(left,right) - (i.e. This function computes a feature vector which, in some sense, - captures information useful for deciding if matching left to right - is "good"). - !*/ - - unsigned long num_nonnegative_weights ( - ) const; - /*! - ensures - - returns the number of elements of the w parameter vector which should be - non-negative. That is, this feature extractor is intended to be used - with w vectors where the first num_nonnegative_weights() elements of w - are >= 0. That is, it should be the case that w(i) >= 0 for all i < - num_nonnegative_weights(). - - Note that num_nonnegative_weights() is just an optional method to allow - you to tell a tool like the structural_assignment_trainer that the - learned w should have a certain number of non-negative elements. - Therefore, if you do not provide a num_nonnegative_weights() method in - your feature extractor then it will default to a value of 0, indicating - that all elements of the w parameter vector may be any value. - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - void serialize( - const example_feature_extractor& item, - std::ostream& out - ); - /*! - provides serialization support - !*/ - - void deserialize( - example_feature_extractor& item, - std::istream& in - ); - /*! - provides deserialization support - !*/ - - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - class assignment_function - { - /*! - REQUIREMENTS ON feature_extractor - It must be an object that implements an interface compatible with - the example_feature_extractor discussed above. - - WHAT THIS OBJECT REPRESENTS - This object is a tool for solving the optimal assignment problem given a - user defined method for computing the quality of any particular assignment. - - To define this precisely, suppose you have two sets of objects, a - Left Hand Set (LHS) and a Right Hand Set (RHS) and you want to - find a one-to-one mapping M from LHS into RHS such that: - M == argmax_m sum_{l in LHS} match_score(l,m(l)) - Where match_score() returns a scalar value indicating how good it is - to say l maps to the RHS element m(l). Additionally, in this model, - m() is allowed to indicate that l doesn't map to anything, and in this - case it is excluded from the sum. - - Finally, this object supports match_score() functions of the form: - match_score(l,r) == dot(w, PSI(l,r)) + bias - where l is an element of LHS, r is an element of RHS, w is a parameter - vector, bias is a scalar valued parameter, and PSI() is defined by the - feature_extractor template argument. - - THREAD SAFETY - It is always safe to use distinct instances of this object in different - threads. However, when a single instance is shared between threads then - the following rules apply: - It is safe to call the const members of this object from multiple - threads so long as the feature_extractor is also threadsafe. This is - because the const members are purely read-only operations. However, - any operation that modifies an assignment_function is not threadsafe. - !*/ - - public: - - typedef typename feature_extractor::lhs_element lhs_element; - typedef typename feature_extractor::rhs_element rhs_element; - typedef std::vector<long> label_type; - typedef label_type result_type; - typedef std::pair<std::vector<lhs_element>, std::vector<rhs_element> > sample_type; - - assignment_function( - ); - /*! - ensures - - #get_feature_extractor() == feature_extractor() - (i.e. it will have its default value) - - #get_weights().size() == #get_feature_extractor().num_features() - - #get_weights() == 0 - - #get_bias() == 0 - - #forces_assignment() == false - !*/ - - explicit assignment_function( - const matrix<double,0,1>& weights, - double bias - ); - /*! - requires - - feature_extractor().num_features() == weights.size() - ensures - - #get_feature_extractor() == feature_extractor() - (i.e. it will have its default value) - - #get_weights() == weights - - #get_bias() == bias - - #forces_assignment() == false - !*/ - - assignment_function( - const matrix<double,0,1>& weights, - double bias, - const feature_extractor& fe - ); - /*! - requires - - fe.num_features() == weights.size() - ensures - - #get_feature_extractor() == fe - - #get_weights() == weights - - #get_bias() == bias - - #forces_assignment() == false - !*/ - - assignment_function( - const matrix<double,0,1>& weights, - double bias, - const feature_extractor& fe, - bool force_assignment - ); - /*! - requires - - fe.num_features() == weights.size() - ensures - - #get_feature_extractor() == fe - - #get_weights() == weights - - #get_bias() == bias - - #forces_assignment() == force_assignment - !*/ - - const feature_extractor& get_feature_extractor ( - ) const; - /*! - ensures - - returns the feature extractor used by this object - !*/ - - const matrix<double,0,1>& get_weights ( - ) const; - /*! - ensures - - returns the parameter vector (w) associated with this assignment function. - The length of the vector is get_feature_extractor().num_features(). - !*/ - - double get_bias ( - ) const; - /*! - ensures - - returns the bias parameter associated with this assignment function. - !*/ - - bool forces_assignment ( - ) const; - /*! - ensures - - returns true if this object is in the "forced assignment mode" and false - otherwise. - - When deciding how to match LHS to RHS, this object can operate in one of - two modes. In the default mode, this object will indicate that there is - no match for an element of LHS if the best matching element of RHS would - result in a negative match_score(). However, in the "forced assignment mode", - this object will always make the assignment if there is an available - element in RHS, regardless of the match_score(). - - Another way to understand this distinction is to consider an example. - Suppose LHS and RHS both contain 10 elements. Then in the default mode, - it is possible for this object to indicate that there are anywhere between - 0 to 10 matches between LHS and RHS. However, in forced assignment mode - it will always indicate exactly 10 matches. - !*/ - - result_type operator()( - const std::vector<lhs_element>& lhs, - const std::vector<rhs_element>& rhs - ) const - /*! - ensures - - returns a vector ASSIGN such that: - - ASSIGN.size() == lhs.size() - - if (ASSIGN[i] != -1) then - - lhs[i] is predicted to associate to rhs[ASSIGN[i]]. - - else - - lhs[i] doesn't associate with anything in rhs. - - All values in ASSIGN which are not equal to -1 are unique. - That is, ASSIGN will never indicate that more than one element - of lhs is assigned to a particular element of rhs. - !*/ - - result_type operator() ( - const sample_type& item - ) const; - /*! - ensures - - returns (*this)(item.first, item.second); - !*/ - - void predict_assignments ( - const sample_type& item, - result_type& assignment - ) const; - /*! - ensures - - #assignment == (*this)(item) - !*/ - - void predict_assignments ( - const std::vector<lhs_element>& lhs, - const std::vector<rhs_element>& rhs - result_type& assignment - ) const; - /*! - ensures - - #assignment == (*this)(lhs,rhs) - !*/ - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - void serialize ( - const assignment_function<feature_extractor>& item, - std::ostream& out - ); - /*! - provides serialization support - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - void deserialize ( - assignment_function<feature_extractor>& item, - std::istream& in - ); - /*! - provides deserialization support - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_ASSIGNMENT_FuNCTION_ABSTRACT_Hh_ - diff --git a/ml/dlib/dlib/svm/cross_validate_assignment_trainer.h b/ml/dlib/dlib/svm/cross_validate_assignment_trainer.h deleted file mode 100644 index 8166e1c82..000000000 --- a/ml/dlib/dlib/svm/cross_validate_assignment_trainer.h +++ /dev/null @@ -1,181 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_CROSS_VALIDATE_ASSiGNEMNT_TRAINER_Hh_ -#define DLIB_CROSS_VALIDATE_ASSiGNEMNT_TRAINER_Hh_ - -#include "cross_validate_assignment_trainer_abstract.h" -#include <vector> -#include "../matrix.h" -#include "svm.h" - - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename assignment_function - > - double test_assignment_function ( - const assignment_function& assigner, - const std::vector<typename assignment_function::sample_type>& samples, - const std::vector<typename assignment_function::label_type>& labels - ) - { - // make sure requires clause is not broken -#ifdef ENABLE_ASSERTS - if (assigner.forces_assignment()) - { - DLIB_ASSERT(is_forced_assignment_problem(samples, labels), - "\t double test_assignment_function()" - << "\n\t invalid inputs were given to this function" - << "\n\t is_forced_assignment_problem(samples,labels): " << is_forced_assignment_problem(samples,labels) - << "\n\t is_assignment_problem(samples,labels): " << is_assignment_problem(samples,labels) - << "\n\t is_learning_problem(samples,labels): " << is_learning_problem(samples,labels) - ); - } - else - { - DLIB_ASSERT(is_assignment_problem(samples, labels), - "\t double test_assignment_function()" - << "\n\t invalid inputs were given to this function" - << "\n\t is_assignment_problem(samples,labels): " << is_assignment_problem(samples,labels) - << "\n\t is_learning_problem(samples,labels): " << is_learning_problem(samples,labels) - ); - } -#endif - double total_right = 0; - double total = 0; - for (unsigned long i = 0; i < samples.size(); ++i) - { - const std::vector<long>& out = assigner(samples[i]); - for (unsigned long j = 0; j < out.size(); ++j) - { - if (out[j] == labels[i][j]) - ++total_right; - - ++total; - } - } - - if (total != 0) - return total_right/total; - else - return 1; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type - > - double cross_validate_assignment_trainer ( - const trainer_type& trainer, - const std::vector<typename trainer_type::sample_type>& samples, - const std::vector<typename trainer_type::label_type>& labels, - const long folds - ) - { - // make sure requires clause is not broken -#ifdef ENABLE_ASSERTS - if (trainer.forces_assignment()) - { - DLIB_ASSERT(is_forced_assignment_problem(samples, labels) && - 1 < folds && folds <= static_cast<long>(samples.size()), - "\t double cross_validate_assignment_trainer()" - << "\n\t invalid inputs were given to this function" - << "\n\t samples.size(): " << samples.size() - << "\n\t folds: " << folds - << "\n\t is_forced_assignment_problem(samples,labels): " << is_forced_assignment_problem(samples,labels) - << "\n\t is_assignment_problem(samples,labels): " << is_assignment_problem(samples,labels) - << "\n\t is_learning_problem(samples,labels): " << is_learning_problem(samples,labels) - ); - } - else - { - DLIB_ASSERT(is_assignment_problem(samples, labels) && - 1 < folds && folds <= static_cast<long>(samples.size()), - "\t double cross_validate_assignment_trainer()" - << "\n\t invalid inputs were given to this function" - << "\n\t samples.size(): " << samples.size() - << "\n\t folds: " << folds - << "\n\t is_assignment_problem(samples,labels): " << is_assignment_problem(samples,labels) - << "\n\t is_learning_problem(samples,labels): " << is_learning_problem(samples,labels) - ); - } -#endif - - - - typedef typename trainer_type::sample_type sample_type; - typedef typename trainer_type::label_type label_type; - - const long num_in_test = samples.size()/folds; - const long num_in_train = samples.size() - num_in_test; - - - std::vector<sample_type> samples_test, samples_train; - std::vector<label_type> labels_test, labels_train; - - - long next_test_idx = 0; - double total_right = 0; - double total = 0; - - - for (long i = 0; i < folds; ++i) - { - samples_test.clear(); - labels_test.clear(); - samples_train.clear(); - labels_train.clear(); - - // load up the test samples - for (long cnt = 0; cnt < num_in_test; ++cnt) - { - samples_test.push_back(samples[next_test_idx]); - labels_test.push_back(labels[next_test_idx]); - next_test_idx = (next_test_idx + 1)%samples.size(); - } - - // load up the training samples - long next = next_test_idx; - for (long cnt = 0; cnt < num_in_train; ++cnt) - { - samples_train.push_back(samples[next]); - labels_train.push_back(labels[next]); - next = (next + 1)%samples.size(); - } - - - const typename trainer_type::trained_function_type& df = trainer.train(samples_train,labels_train); - - // check how good df is on the test data - for (unsigned long i = 0; i < samples_test.size(); ++i) - { - const std::vector<long>& out = df(samples_test[i]); - for (unsigned long j = 0; j < out.size(); ++j) - { - if (out[j] == labels_test[i][j]) - ++total_right; - - ++total; - } - } - - } // for (long i = 0; i < folds; ++i) - - if (total != 0) - return total_right/total; - else - return 1; - - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_CROSS_VALIDATE_ASSiGNEMNT_TRAINER_Hh_ - diff --git a/ml/dlib/dlib/svm/cross_validate_assignment_trainer_abstract.h b/ml/dlib/dlib/svm/cross_validate_assignment_trainer_abstract.h deleted file mode 100644 index 05dd4758e..000000000 --- a/ml/dlib/dlib/svm/cross_validate_assignment_trainer_abstract.h +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_CROSS_VALIDATE_ASSiGNEMNT_TRAINER_ABSTRACT_Hh_ -#ifdef DLIB_CROSS_VALIDATE_ASSiGNEMNT_TRAINER_ABSTRACT_Hh_ - -#include <vector> -#include "../matrix.h" -#include "svm.h" - - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename assignment_function - > - double test_assignment_function ( - const assignment_function& assigner, - const std::vector<typename assignment_function::sample_type>& samples, - const std::vector<typename assignment_function::label_type>& labels - ); - /*! - requires - - is_assignment_problem(samples, labels) - - if (assigner.forces_assignment()) then - - is_forced_assignment_problem(samples, labels) - - assignment_function == an instantiation of the dlib::assignment_function - template or an object with a compatible interface. - ensures - - Tests assigner against the given samples and labels and returns the fraction - of assignments predicted correctly. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type - > - double cross_validate_assignment_trainer ( - const trainer_type& trainer, - const std::vector<typename trainer_type::sample_type>& samples, - const std::vector<typename trainer_type::label_type>& labels, - const long folds - ); - /*! - requires - - is_assignment_problem(samples, labels) - - if (trainer.forces_assignment()) then - - is_forced_assignment_problem(samples, labels) - - 1 < folds <= samples.size() - - trainer_type == dlib::structural_assignment_trainer or an object - with a compatible interface. - ensures - - performs k-fold cross validation by using the given trainer to solve the - given assignment learning problem for the given number of folds. Each fold - is tested using the output of the trainer and the fraction of assignments - predicted correctly is returned. - - The number of folds used is given by the folds argument. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_CROSS_VALIDATE_ASSiGNEMNT_TRAINER_ABSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/svm/cross_validate_graph_labeling_trainer.h b/ml/dlib/dlib/svm/cross_validate_graph_labeling_trainer.h deleted file mode 100644 index 83e4e4048..000000000 --- a/ml/dlib/dlib/svm/cross_validate_graph_labeling_trainer.h +++ /dev/null @@ -1,258 +0,0 @@ -// Copyright (C) 2012 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_CROSS_VALIDATE_GRAPh_LABELING_TRAINER_Hh_ -#define DLIB_CROSS_VALIDATE_GRAPh_LABELING_TRAINER_Hh_ - -#include "../array.h" -#include "../graph_cuts/min_cut.h" -#include "svm.h" -#include "cross_validate_graph_labeling_trainer_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename graph_labeler, - typename graph_type - > - matrix<double,1,2> test_graph_labeling_function ( - const graph_labeler& labeler, - const dlib::array<graph_type>& samples, - const std::vector<std::vector<bool> >& labels, - const std::vector<std::vector<double> >& losses - ) - { -#ifdef ENABLE_ASSERTS - std::string reason_for_failure; - DLIB_ASSERT(is_graph_labeling_problem(samples, labels, reason_for_failure) , - "\t matrix test_graph_labeling_function()" - << "\n\t invalid inputs were given to this function" - << "\n\t samples.size(): " << samples.size() - << "\n\t reason_for_failure: " << reason_for_failure - ); - DLIB_ASSERT((losses.size() == 0 || sizes_match(labels, losses) == true) && - all_values_are_nonnegative(losses) == true, - "\t matrix test_graph_labeling_function()" - << "\n\t Invalid inputs were given to this function." - << "\n\t labels.size(): " << labels.size() - << "\n\t losses.size(): " << losses.size() - << "\n\t sizes_match(labels,losses): " << sizes_match(labels,losses) - << "\n\t all_values_are_nonnegative(losses): " << all_values_are_nonnegative(losses) - ); -#endif - - std::vector<bool> temp; - double num_pos_correct = 0; - double num_pos = 0; - double num_neg_correct = 0; - double num_neg = 0; - - for (unsigned long i = 0; i < samples.size(); ++i) - { - labeler(samples[i], temp); - - for (unsigned long j = 0; j < labels[i].size(); ++j) - { - // What is the loss for this example? It's just 1 unless we have a - // per example loss vector. - const double loss = (losses.size() == 0) ? 1.0 : losses[i][j]; - - if (labels[i][j]) - { - num_pos += loss; - if (temp[j]) - num_pos_correct += loss; - } - else - { - num_neg += loss; - if (!temp[j]) - num_neg_correct += loss; - } - } - } - - matrix<double, 1, 2> res; - if (num_pos != 0) - res(0) = num_pos_correct/num_pos; - else - res(0) = 1; - if (num_neg != 0) - res(1) = num_neg_correct/num_neg; - else - res(1) = 1; - return res; - } - - template < - typename graph_labeler, - typename graph_type - > - matrix<double,1,2> test_graph_labeling_function ( - const graph_labeler& labeler, - const dlib::array<graph_type>& samples, - const std::vector<std::vector<bool> >& labels - ) - { - std::vector<std::vector<double> > losses; - return test_graph_labeling_function(labeler, samples, labels, losses); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type, - typename graph_type - > - matrix<double,1,2> cross_validate_graph_labeling_trainer ( - const trainer_type& trainer, - const dlib::array<graph_type>& samples, - const std::vector<std::vector<bool> >& labels, - const std::vector<std::vector<double> >& losses, - const long folds - ) - { -#ifdef ENABLE_ASSERTS - std::string reason_for_failure; - DLIB_ASSERT(is_graph_labeling_problem(samples, labels, reason_for_failure), - "\t matrix cross_validate_graph_labeling_trainer()" - << "\n\t invalid inputs were given to this function" - << "\n\t samples.size(): " << samples.size() - << "\n\t reason_for_failure: " << reason_for_failure - ); - DLIB_ASSERT( 1 < folds && folds <= static_cast<long>(samples.size()), - "\t matrix cross_validate_graph_labeling_trainer()" - << "\n\t invalid inputs were given to this function" - << "\n\t folds: " << folds - ); - DLIB_ASSERT((losses.size() == 0 || sizes_match(labels, losses) == true) && - all_values_are_nonnegative(losses) == true, - "\t matrix cross_validate_graph_labeling_trainer()" - << "\n\t Invalid inputs were given to this function." - << "\n\t labels.size(): " << labels.size() - << "\n\t losses.size(): " << losses.size() - << "\n\t sizes_match(labels,losses): " << sizes_match(labels,losses) - << "\n\t all_values_are_nonnegative(losses): " << all_values_are_nonnegative(losses) - ); -#endif - - typedef std::vector<bool> label_type; - - const long num_in_test = samples.size()/folds; - const long num_in_train = samples.size() - num_in_test; - - - dlib::array<graph_type> samples_test, samples_train; - std::vector<label_type> labels_test, labels_train; - std::vector<std::vector<double> > losses_test, losses_train; - - - long next_test_idx = 0; - - std::vector<bool> temp; - double num_pos_correct = 0; - double num_pos = 0; - double num_neg_correct = 0; - double num_neg = 0; - - graph_type gtemp; - - for (long i = 0; i < folds; ++i) - { - samples_test.clear(); - labels_test.clear(); - losses_test.clear(); - samples_train.clear(); - labels_train.clear(); - losses_train.clear(); - - // load up the test samples - for (long cnt = 0; cnt < num_in_test; ++cnt) - { - copy_graph(samples[next_test_idx], gtemp); - samples_test.push_back(gtemp); - labels_test.push_back(labels[next_test_idx]); - if (losses.size() != 0) - losses_test.push_back(losses[next_test_idx]); - next_test_idx = (next_test_idx + 1)%samples.size(); - } - - // load up the training samples - long next = next_test_idx; - for (long cnt = 0; cnt < num_in_train; ++cnt) - { - copy_graph(samples[next], gtemp); - samples_train.push_back(gtemp); - labels_train.push_back(labels[next]); - if (losses.size() != 0) - losses_train.push_back(losses[next]); - next = (next + 1)%samples.size(); - } - - - const typename trainer_type::trained_function_type& labeler = trainer.train(samples_train,labels_train,losses_train); - - // check how good labeler is on the test data - for (unsigned long i = 0; i < samples_test.size(); ++i) - { - labeler(samples_test[i], temp); - for (unsigned long j = 0; j < labels_test[i].size(); ++j) - { - // What is the loss for this example? It's just 1 unless we have a - // per example loss vector. - const double loss = (losses_test.size() == 0) ? 1.0 : losses_test[i][j]; - - if (labels_test[i][j]) - { - num_pos += loss; - if (temp[j]) - num_pos_correct += loss; - } - else - { - num_neg += loss; - if (!temp[j]) - num_neg_correct += loss; - } - } - } - - } // for (long i = 0; i < folds; ++i) - - - matrix<double, 1, 2> res; - if (num_pos != 0) - res(0) = num_pos_correct/num_pos; - else - res(0) = 1; - if (num_neg != 0) - res(1) = num_neg_correct/num_neg; - else - res(1) = 1; - return res; - } - - template < - typename trainer_type, - typename graph_type - > - matrix<double,1,2> cross_validate_graph_labeling_trainer ( - const trainer_type& trainer, - const dlib::array<graph_type>& samples, - const std::vector<std::vector<bool> >& labels, - const long folds - ) - { - std::vector<std::vector<double> > losses; - return cross_validate_graph_labeling_trainer(trainer, samples, labels, losses, folds); - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_CROSS_VALIDATE_GRAPh_LABELING_TRAINER_Hh_ - - diff --git a/ml/dlib/dlib/svm/cross_validate_graph_labeling_trainer_abstract.h b/ml/dlib/dlib/svm/cross_validate_graph_labeling_trainer_abstract.h deleted file mode 100644 index cda4af91e..000000000 --- a/ml/dlib/dlib/svm/cross_validate_graph_labeling_trainer_abstract.h +++ /dev/null @@ -1,147 +0,0 @@ -// Copyright (C) 2012 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_CROSS_VALIDATE_GRAPh_LABELING_TRAINER_ABSTRACT_Hh_ -#ifdef DLIB_CROSS_VALIDATE_GRAPh_LABELING_TRAINER_ABSTRACT_Hh_ - -#include "../array/array_kernel_abstract.h" -#include <vector> -#include "../matrix/matrix_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename graph_labeler, - typename graph_type - > - matrix<double,1,2> test_graph_labeling_function ( - const graph_labeler& labeler, - const dlib::array<graph_type>& samples, - const std::vector<std::vector<bool> >& labels - ); - /*! - requires - - is_graph_labeling_problem(samples,labels) == true - - graph_labeler == an object with an interface compatible with the - dlib::graph_labeler object. - - the following must be a valid expression: labeler(samples[0]); - ensures - - This function tests the accuracy of the given graph labeler against - the sample graphs and their associated labels. In particular, this - function returns a matrix R such that: - - R(0) == The fraction of nodes which are supposed to have a label of - true that are labeled as such by the labeler. - - R(1) == The fraction of nodes which are supposed to have a label of - false that are labeled as such by the labeler. - Therefore, if R is [1,1] then the labeler makes perfect predictions while - an R of [0,0] indicates that it gets everything wrong. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename graph_labeler, - typename graph_type - > - matrix<double,1,2> test_graph_labeling_function ( - const graph_labeler& labeler, - const dlib::array<graph_type>& samples, - const std::vector<std::vector<bool> >& labels, - const std::vector<std::vector<double> >& losses - ); - /*! - requires - - is_graph_labeling_problem(samples,labels) == true - - graph_labeler == an object with an interface compatible with the - dlib::graph_labeler object. - - the following must be a valid expression: labeler(samples[0]); - - if (losses.size() != 0) then - - sizes_match(labels, losses) == true - - all_values_are_nonnegative(losses) == true - ensures - - This overload of test_graph_labeling_function() does the same thing as the - one defined above, except that instead of counting 1 for each labeling - mistake, it weights each mistake according to the corresponding value in - losses. That is, instead of counting a value of 1 for making a mistake on - samples[i].node(j), this routine counts a value of losses[i][j]. Under this - interpretation, the loss values represent how useful it is to correctly label - each node. Therefore, the values returned represent fractions of overall - labeling utility rather than raw labeling accuracy. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type, - typename graph_type - > - matrix<double,1,2> cross_validate_graph_labeling_trainer ( - const trainer_type& trainer, - const dlib::array<graph_type>& samples, - const std::vector<std::vector<bool> >& labels, - const long folds - ); - /*! - requires - - is_graph_labeling_problem(samples,labels) == true - - 1 < folds <= samples.size() - - trainer_type == an object which trains some kind of graph labeler object - (e.g. structural_graph_labeling_trainer) - ensures - - performs k-fold cross validation by using the given trainer to solve the - given graph labeling problem for the given number of folds. Each fold - is tested using the output of the trainer and the average classification - accuracy from all folds is returned. In particular, this function returns - a matrix R such that: - - R(0) == The fraction of nodes which are supposed to have a label of - true that are labeled as such by the learned labeler. - - R(1) == The fraction of nodes which are supposed to have a label of - false that are labeled as such by the learned labeler. - Therefore, if R is [1,1] then the labeler makes perfect predictions while - an R of [0,0] indicates that it gets everything wrong. - - The number of folds used is given by the folds argument. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type, - typename graph_type - > - matrix<double,1,2> cross_validate_graph_labeling_trainer ( - const trainer_type& trainer, - const dlib::array<graph_type>& samples, - const std::vector<std::vector<bool> >& labels, - const std::vector<std::vector<double> >& losses, - const long folds - ); - /*! - requires - - is_graph_labeling_problem(samples,labels) == true - - 1 < folds <= samples.size() - - trainer_type == an object which trains some kind of graph labeler object - (e.g. structural_graph_labeling_trainer) - - if (losses.size() != 0) then - - sizes_match(labels, losses) == true - - all_values_are_nonnegative(losses) == true - ensures - - This overload of cross_validate_graph_labeling_trainer() does the same thing - as the one defined above, except that instead of counting 1 for each labeling - mistake, it weights each mistake according to the corresponding value in - losses. That is, instead of counting a value of 1 for making a mistake on - samples[i].node(j), this routine counts a value of losses[i][j]. Under this - interpretation, the loss values represent how useful it is to correctly label - each node. Therefore, the values returned represent fractions of overall - labeling utility rather than raw labeling accuracy. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_CROSS_VALIDATE_GRAPh_LABELING_TRAINER_ABSTRACT_Hh_ - - - diff --git a/ml/dlib/dlib/svm/cross_validate_multiclass_trainer.h b/ml/dlib/dlib/svm/cross_validate_multiclass_trainer.h deleted file mode 100644 index be8fa3f3f..000000000 --- a/ml/dlib/dlib/svm/cross_validate_multiclass_trainer.h +++ /dev/null @@ -1,208 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_CROSS_VALIDATE_MULTICLASS_TRaINER_Hh_ -#define DLIB_CROSS_VALIDATE_MULTICLASS_TRaINER_Hh_ - -#include <vector> -#include "../matrix.h" -#include "cross_validate_multiclass_trainer_abstract.h" -#include <sstream> - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename dec_funct_type, - typename sample_type, - typename label_type - > - const matrix<double> test_multiclass_decision_function ( - const dec_funct_type& dec_funct, - const std::vector<sample_type>& x_test, - const std::vector<label_type>& y_test - ) - { - - // make sure requires clause is not broken - DLIB_ASSERT( is_learning_problem(x_test,y_test) == true, - "\tmatrix test_multiclass_decision_function()" - << "\n\t invalid inputs were given to this function" - << "\n\t is_learning_problem(x_test,y_test): " - << is_learning_problem(x_test,y_test)); - - - const std::vector<label_type> all_labels = dec_funct.get_labels(); - - // make a lookup table that maps from labels to their index in all_labels - std::map<label_type,unsigned long> label_to_int; - for (unsigned long i = 0; i < all_labels.size(); ++i) - label_to_int[all_labels[i]] = i; - - matrix<double, 0, 0, typename dec_funct_type::mem_manager_type> res; - res.set_size(all_labels.size(), all_labels.size()); - - res = 0; - - typename std::map<label_type,unsigned long>::const_iterator iter; - - // now test this trained object - for (unsigned long i = 0; i < x_test.size(); ++i) - { - iter = label_to_int.find(y_test[i]); - // ignore samples with labels that the decision function doesn't know about. - if (iter == label_to_int.end()) - continue; - - const unsigned long truth = iter->second; - const unsigned long pred = label_to_int[dec_funct(x_test[i])]; - - res(truth,pred) += 1; - } - - return res; - } - -// ---------------------------------------------------------------------------------------- - - class cross_validation_error : public dlib::error - { - public: - cross_validation_error(const std::string& msg) : dlib::error(msg){}; - }; - - template < - typename trainer_type, - typename sample_type, - typename label_type - > - const matrix<double> cross_validate_multiclass_trainer ( - const trainer_type& trainer, - const std::vector<sample_type>& x, - const std::vector<label_type>& y, - const long folds - ) - { - typedef typename trainer_type::mem_manager_type mem_manager_type; - - // make sure requires clause is not broken - DLIB_ASSERT(is_learning_problem(x,y) == true && - 1 < folds && folds <= static_cast<long>(x.size()), - "\tmatrix cross_validate_multiclass_trainer()" - << "\n\t invalid inputs were given to this function" - << "\n\t x.size(): " << x.size() - << "\n\t folds: " << folds - << "\n\t is_learning_problem(x,y): " << is_learning_problem(x,y) - ); - - const std::vector<label_type> all_labels = select_all_distinct_labels(y); - - // count the number of times each label shows up - std::map<label_type,long> label_counts; - for (unsigned long i = 0; i < y.size(); ++i) - label_counts[y[i]] += 1; - - - // figure out how many samples from each class will be in the test and train splits - std::map<label_type,long> num_in_test, num_in_train; - for (typename std::map<label_type,long>::iterator i = label_counts.begin(); i != label_counts.end(); ++i) - { - const long in_test = i->second/folds; - if (in_test == 0) - { - std::ostringstream sout; - sout << "In dlib::cross_validate_multiclass_trainer(), the number of folds was larger" << std::endl; - sout << "than the number of elements of one of the training classes." << std::endl; - sout << " folds: "<< folds << std::endl; - sout << " size of class " << i->first << ": "<< i->second << std::endl; - throw cross_validation_error(sout.str()); - } - num_in_test[i->first] = in_test; - num_in_train[i->first] = i->second - in_test; - } - - - - std::vector<sample_type> x_test, x_train; - std::vector<label_type> y_test, y_train; - - matrix<double, 0, 0, mem_manager_type> res; - - std::map<label_type,long> next_test_idx; - for (unsigned long i = 0; i < all_labels.size(); ++i) - next_test_idx[all_labels[i]] = 0; - - label_type label; - - for (long i = 0; i < folds; ++i) - { - x_test.clear(); - y_test.clear(); - x_train.clear(); - y_train.clear(); - - // load up the test samples - for (unsigned long j = 0; j < all_labels.size(); ++j) - { - label = all_labels[j]; - long next = next_test_idx[label]; - - long cur = 0; - const long num_needed = num_in_test[label]; - while (cur < num_needed) - { - if (y[next] == label) - { - x_test.push_back(x[next]); - y_test.push_back(label); - ++cur; - } - next = (next + 1)%x.size(); - } - - next_test_idx[label] = next; - } - - // load up the training samples - for (unsigned long j = 0; j < all_labels.size(); ++j) - { - label = all_labels[j]; - long next = next_test_idx[label]; - - long cur = 0; - const long num_needed = num_in_train[label]; - while (cur < num_needed) - { - if (y[next] == label) - { - x_train.push_back(x[next]); - y_train.push_back(label); - ++cur; - } - next = (next + 1)%x.size(); - } - } - - - try - { - // do the training and testing - res += test_multiclass_decision_function(trainer.train(x_train,y_train),x_test,y_test); - } - catch (invalid_nu_error&) - { - // just ignore cases which result in an invalid nu - } - - } // for (long i = 0; i < folds; ++i) - - return res; - } - -} - -// ---------------------------------------------------------------------------------------- - -#endif // DLIB_CROSS_VALIDATE_MULTICLASS_TRaINER_Hh_ - diff --git a/ml/dlib/dlib/svm/cross_validate_multiclass_trainer_abstract.h b/ml/dlib/dlib/svm/cross_validate_multiclass_trainer_abstract.h deleted file mode 100644 index f84503cdc..000000000 --- a/ml/dlib/dlib/svm/cross_validate_multiclass_trainer_abstract.h +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_CROSS_VALIDATE_MULTICLASS_TRaINER_ABSTRACT_Hh_ -#ifdef DLIB_CROSS_VALIDATE_MULTICLASS_TRaINER_ABSTRACT_Hh_ - -#include <vector> -#include "../matrix.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename dec_funct_type, - typename sample_type, - typename label_type - > - const matrix<double> test_multiclass_decision_function ( - const dec_funct_type& dec_funct, - const std::vector<sample_type>& x_test, - const std::vector<label_type>& y_test - ); - /*! - requires - - is_learning_problem(x_test, y_test) - - dec_funct_type == some kind of multiclass decision function object - (e.g. one_vs_one_decision_function) - ensures - - Tests dec_funct against the given samples in x_test and labels in y_test - and returns a confusion matrix summarizing the results. - - let L = dec_funct.get_labels(). Then the confusion matrix C returned - by this function has the following properties. - - C.nr() == C.nc() == L.size() - - C(r,c) == the number of times a sample with label L(r) was predicted - to have a label of L(c) - - Any samples with a y_test value not in L are ignored. That is, samples - with labels the decision function hasn't ever seen before are ignored. - !*/ - -// ---------------------------------------------------------------------------------------- - - class cross_validation_error : public dlib::error - { - /*! - This is the exception class used by the cross_validate_multiclass_trainer() - routine. - !*/ - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type, - typename sample_type, - typename label_type - > - const matrix<double> cross_validate_multiclass_trainer ( - const trainer_type& trainer, - const std::vector<sample_type>& x, - const std::vector<label_type>& y, - const long folds - ); - /*! - requires - - is_learning_problem(x,y) - - 1 < folds <= x.size() - - trainer_type == some kind of multiclass classification trainer object (e.g. one_vs_one_trainer) - ensures - - performs k-fold cross validation by using the given trainer to solve the - given multiclass classification problem for the given number of folds. - Each fold is tested using the output of the trainer and the confusion - matrix from all folds is summed and returned. - - The total confusion matrix is computed by running test_binary_decision_function() - on each fold and summing its output. - - The number of folds used is given by the folds argument. - - let L = select_all_distinct_labels(y). Then the confusion matrix C returned - by this function has the following properties. - - C.nr() == C.nc() == L.size() - - C(r,c) == the number of times a sample with label L(r) was predicted - to have a label of L(c) - - Note that sum(C) might be slightly less than x.size(). This happens if the number of - samples in a class is not an even multiple of folds. This is because each fold has the - same number of test samples in it and so if the number of samples in a class isn't a - multiple of folds then a few are not tested. - throws - - cross_validation_error - This exception is thrown if one of the classes has fewer samples than - the number of requested folds. - !*/ - -} - -// ---------------------------------------------------------------------------------------- - -#endif // DLIB_CROSS_VALIDATE_MULTICLASS_TRaINER_ABSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/svm/cross_validate_object_detection_trainer.h b/ml/dlib/dlib/svm/cross_validate_object_detection_trainer.h deleted file mode 100644 index 7cb38f0b7..000000000 --- a/ml/dlib/dlib/svm/cross_validate_object_detection_trainer.h +++ /dev/null @@ -1,430 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_CROSS_VALIDATE_OBJECT_DETECTION_TRaINER_Hh_ -#define DLIB_CROSS_VALIDATE_OBJECT_DETECTION_TRaINER_Hh_ - -#include "cross_validate_object_detection_trainer_abstract.h" -#include <vector> -#include "../matrix.h" -#include "svm.h" -#include "../geometry.h" -#include "../image_processing/full_object_detection.h" -#include "../image_processing/box_overlap_testing.h" -#include "../statistics.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - namespace impl - { - inline unsigned long number_of_truth_hits ( - const std::vector<full_object_detection>& truth_boxes, - const std::vector<rectangle>& ignore, - const std::vector<std::pair<double,rectangle> >& boxes, - const test_box_overlap& overlap_tester, - std::vector<std::pair<double,bool> >& all_dets, - unsigned long& missing_detections, - const test_box_overlap& overlaps_ignore_tester - ) - /*! - ensures - - returns the number of elements in truth_boxes which are overlapped by an - element of boxes. In this context, two boxes, A and B, overlap if and only if - overlap_tester(A,B) == true. - - No element of boxes is allowed to account for more than one element of truth_boxes. - - The returned number is in the range [0,truth_boxes.size()] - - Adds the score for each box from boxes into all_dets and labels each with - a bool indicating if it hit a truth box. Note that we skip boxes that - don't hit any truth boxes and match an ignore box. - - Adds the number of truth boxes which didn't have any hits into - missing_detections. - !*/ - { - if (boxes.size() == 0) - { - missing_detections += truth_boxes.size(); - return 0; - } - - unsigned long count = 0; - std::vector<bool> used(boxes.size(),false); - for (unsigned long i = 0; i < truth_boxes.size(); ++i) - { - bool found_match = false; - // Find the first box that hits truth_boxes[i] - for (unsigned long j = 0; j < boxes.size(); ++j) - { - if (used[j]) - continue; - - if (overlap_tester(truth_boxes[i].get_rect(), boxes[j].second)) - { - used[j] = true; - ++count; - found_match = true; - break; - } - } - - if (!found_match) - ++missing_detections; - } - - for (unsigned long i = 0; i < boxes.size(); ++i) - { - // only out put boxes if they match a truth box or are not ignored. - if (used[i] || !overlaps_any_box(overlaps_ignore_tester, ignore, boxes[i].second)) - { - all_dets.push_back(std::make_pair(boxes[i].first, used[i])); - } - } - - return count; - } - - inline unsigned long number_of_truth_hits ( - const std::vector<full_object_detection>& truth_boxes, - const std::vector<rectangle>& ignore, - const std::vector<std::pair<double,rectangle> >& boxes, - const test_box_overlap& overlap_tester, - std::vector<std::pair<double,bool> >& all_dets, - unsigned long& missing_detections - ) - { - return number_of_truth_hits(truth_boxes, ignore, boxes, overlap_tester, all_dets, missing_detections, overlap_tester); - } - - // ------------------------------------------------------------------------------------ - - } - -// ---------------------------------------------------------------------------------------- - - template < - typename object_detector_type, - typename image_array_type - > - const matrix<double,1,3> test_object_detection_function ( - object_detector_type& detector, - const image_array_type& images, - const std::vector<std::vector<full_object_detection> >& truth_dets, - const std::vector<std::vector<rectangle> >& ignore, - const test_box_overlap& overlap_tester = test_box_overlap(), - const double adjust_threshold = 0 - ) - { - // make sure requires clause is not broken - DLIB_CASSERT( is_learning_problem(images,truth_dets) == true && - ignore.size() == images.size(), - "\t matrix test_object_detection_function()" - << "\n\t invalid inputs were given to this function" - << "\n\t is_learning_problem(images,truth_dets): " << is_learning_problem(images,truth_dets) - << "\n\t ignore.size(): " << ignore.size() - << "\n\t images.size(): " << images.size() - ); - - - - double correct_hits = 0; - double total_true_targets = 0; - - std::vector<std::pair<double,bool> > all_dets; - unsigned long missing_detections = 0; - - - for (unsigned long i = 0; i < images.size(); ++i) - { - std::vector<std::pair<double,rectangle> > hits; - detector(images[i], hits, adjust_threshold); - - correct_hits += impl::number_of_truth_hits(truth_dets[i], ignore[i], hits, overlap_tester, all_dets, missing_detections); - total_true_targets += truth_dets[i].size(); - } - - std::sort(all_dets.rbegin(), all_dets.rend()); - - double precision, recall; - - double total_hits = all_dets.size(); - - if (total_hits == 0) - precision = 1; - else - precision = correct_hits / total_hits; - - if (total_true_targets == 0) - recall = 1; - else - recall = correct_hits / total_true_targets; - - matrix<double, 1, 3> res; - res = precision, recall, average_precision(all_dets, missing_detections); - return res; - } - - template < - typename object_detector_type, - typename image_array_type - > - const matrix<double,1,3> test_object_detection_function ( - object_detector_type& detector, - const image_array_type& images, - const std::vector<std::vector<rectangle> >& truth_dets, - const std::vector<std::vector<rectangle> >& ignore, - const test_box_overlap& overlap_tester = test_box_overlap(), - const double adjust_threshold = 0 - ) - { - // convert into a list of regular rectangles. - std::vector<std::vector<full_object_detection> > rects(truth_dets.size()); - for (unsigned long i = 0; i < truth_dets.size(); ++i) - { - for (unsigned long j = 0; j < truth_dets[i].size(); ++j) - { - rects[i].push_back(full_object_detection(truth_dets[i][j])); - } - } - - return test_object_detection_function(detector, images, rects, ignore, overlap_tester, adjust_threshold); - } - - template < - typename object_detector_type, - typename image_array_type - > - const matrix<double,1,3> test_object_detection_function ( - object_detector_type& detector, - const image_array_type& images, - const std::vector<std::vector<rectangle> >& truth_dets, - const test_box_overlap& overlap_tester = test_box_overlap(), - const double adjust_threshold = 0 - ) - { - std::vector<std::vector<rectangle> > ignore(images.size()); - return test_object_detection_function(detector,images,truth_dets,ignore, overlap_tester, adjust_threshold); - } - - template < - typename object_detector_type, - typename image_array_type - > - const matrix<double,1,3> test_object_detection_function ( - object_detector_type& detector, - const image_array_type& images, - const std::vector<std::vector<full_object_detection> >& truth_dets, - const test_box_overlap& overlap_tester = test_box_overlap(), - const double adjust_threshold = 0 - ) - { - std::vector<std::vector<rectangle> > ignore(images.size()); - return test_object_detection_function(detector,images,truth_dets,ignore, overlap_tester, adjust_threshold); - } - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - namespace impl - { - template < - typename array_type - > - struct array_subset_helper - { - typedef typename array_type::mem_manager_type mem_manager_type; - - array_subset_helper ( - const array_type& array_, - const std::vector<unsigned long>& idx_set_ - ) : - array(array_), - idx_set(idx_set_) - { - } - - unsigned long size() const { return idx_set.size(); } - - typedef typename array_type::type type; - const type& operator[] ( - unsigned long idx - ) const { return array[idx_set[idx]]; } - - private: - const array_type& array; - const std::vector<unsigned long>& idx_set; - }; - - template < - typename T - > - const matrix_op<op_array_to_mat<array_subset_helper<T> > > mat ( - const array_subset_helper<T>& m - ) - { - typedef op_array_to_mat<array_subset_helper<T> > op; - return matrix_op<op>(op(m)); - } - - } - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type, - typename image_array_type - > - const matrix<double,1,3> cross_validate_object_detection_trainer ( - const trainer_type& trainer, - const image_array_type& images, - const std::vector<std::vector<full_object_detection> >& truth_dets, - const std::vector<std::vector<rectangle> >& ignore, - const long folds, - const test_box_overlap& overlap_tester = test_box_overlap(), - const double adjust_threshold = 0 - ) - { - // make sure requires clause is not broken - DLIB_CASSERT( is_learning_problem(images,truth_dets) == true && - ignore.size() == images.size() && - 1 < folds && folds <= static_cast<long>(images.size()), - "\t matrix cross_validate_object_detection_trainer()" - << "\n\t invalid inputs were given to this function" - << "\n\t is_learning_problem(images,truth_dets): " << is_learning_problem(images,truth_dets) - << "\n\t folds: "<< folds - << "\n\t ignore.size(): " << ignore.size() - << "\n\t images.size(): " << images.size() - ); - - double correct_hits = 0; - double total_true_targets = 0; - - const long test_size = images.size()/folds; - - std::vector<std::pair<double,bool> > all_dets; - unsigned long missing_detections = 0; - unsigned long test_idx = 0; - for (long iter = 0; iter < folds; ++iter) - { - std::vector<unsigned long> train_idx_set; - std::vector<unsigned long> test_idx_set; - - for (long i = 0; i < test_size; ++i) - test_idx_set.push_back(test_idx++); - - unsigned long train_idx = test_idx%images.size(); - std::vector<std::vector<full_object_detection> > training_rects; - std::vector<std::vector<rectangle> > training_ignores; - for (unsigned long i = 0; i < images.size()-test_size; ++i) - { - training_rects.push_back(truth_dets[train_idx]); - training_ignores.push_back(ignore[train_idx]); - train_idx_set.push_back(train_idx); - train_idx = (train_idx+1)%images.size(); - } - - - impl::array_subset_helper<image_array_type> array_subset(images, train_idx_set); - typename trainer_type::trained_function_type detector = trainer.train(array_subset, training_rects, training_ignores, overlap_tester); - for (unsigned long i = 0; i < test_idx_set.size(); ++i) - { - std::vector<std::pair<double,rectangle> > hits; - detector(images[test_idx_set[i]], hits, adjust_threshold); - - correct_hits += impl::number_of_truth_hits(truth_dets[test_idx_set[i]], ignore[i], hits, overlap_tester, all_dets, missing_detections); - total_true_targets += truth_dets[test_idx_set[i]].size(); - } - - } - - std::sort(all_dets.rbegin(), all_dets.rend()); - - - double precision, recall; - - double total_hits = all_dets.size(); - - if (total_hits == 0) - precision = 1; - else - precision = correct_hits / total_hits; - - if (total_true_targets == 0) - recall = 1; - else - recall = correct_hits / total_true_targets; - - matrix<double, 1, 3> res; - res = precision, recall, average_precision(all_dets, missing_detections); - return res; - } - - template < - typename trainer_type, - typename image_array_type - > - const matrix<double,1,3> cross_validate_object_detection_trainer ( - const trainer_type& trainer, - const image_array_type& images, - const std::vector<std::vector<rectangle> >& truth_dets, - const std::vector<std::vector<rectangle> >& ignore, - const long folds, - const test_box_overlap& overlap_tester = test_box_overlap(), - const double adjust_threshold = 0 - ) - { - // convert into a list of regular rectangles. - std::vector<std::vector<full_object_detection> > dets(truth_dets.size()); - for (unsigned long i = 0; i < truth_dets.size(); ++i) - { - for (unsigned long j = 0; j < truth_dets[i].size(); ++j) - { - dets[i].push_back(full_object_detection(truth_dets[i][j])); - } - } - - return cross_validate_object_detection_trainer(trainer, images, dets, ignore, folds, overlap_tester, adjust_threshold); - } - - template < - typename trainer_type, - typename image_array_type - > - const matrix<double,1,3> cross_validate_object_detection_trainer ( - const trainer_type& trainer, - const image_array_type& images, - const std::vector<std::vector<rectangle> >& truth_dets, - const long folds, - const test_box_overlap& overlap_tester = test_box_overlap(), - const double adjust_threshold = 0 - ) - { - const std::vector<std::vector<rectangle> > ignore(images.size()); - return cross_validate_object_detection_trainer(trainer,images,truth_dets,ignore,folds,overlap_tester,adjust_threshold); - } - - template < - typename trainer_type, - typename image_array_type - > - const matrix<double,1,3> cross_validate_object_detection_trainer ( - const trainer_type& trainer, - const image_array_type& images, - const std::vector<std::vector<full_object_detection> >& truth_dets, - const long folds, - const test_box_overlap& overlap_tester = test_box_overlap(), - const double adjust_threshold = 0 - ) - { - const std::vector<std::vector<rectangle> > ignore(images.size()); - return cross_validate_object_detection_trainer(trainer,images,truth_dets,ignore,folds,overlap_tester,adjust_threshold); - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_CROSS_VALIDATE_OBJECT_DETECTION_TRaINER_Hh_ - diff --git a/ml/dlib/dlib/svm/cross_validate_object_detection_trainer_abstract.h b/ml/dlib/dlib/svm/cross_validate_object_detection_trainer_abstract.h deleted file mode 100644 index 575ed77fb..000000000 --- a/ml/dlib/dlib/svm/cross_validate_object_detection_trainer_abstract.h +++ /dev/null @@ -1,297 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_CROSS_VALIDATE_OBJECT_DETECTION_TRaINER_ABSTRACT_Hh_ -#ifdef DLIB_CROSS_VALIDATE_OBJECT_DETECTION_TRaINER_ABSTRACT_Hh_ - -#include <vector> -#include "../matrix.h" -#include "../geometry.h" -#include "../image_processing/full_object_detection_abstract.h" -#include "../dnn/layers_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename object_detector_type, - typename image_array_type - > - const matrix<double,1,3> test_object_detection_function ( - object_detector_type& detector, - const image_array_type& images, - const std::vector<std::vector<full_object_detection> >& truth_dets, - const std::vector<std::vector<rectangle> >& ignore, - const test_box_overlap& overlap_tester = test_box_overlap(), - const double adjust_threshold = 0 - ); - /*! - requires - - is_learning_problem(images,truth_dets) - - images.size() == ignore.size() - - object_detector_type == some kind of object detector function object - (e.g. object_detector) - - image_array_type must be an implementation of dlib/array/array_kernel_abstract.h - and it must contain objects which can be accepted by detector(). - ensures - - Tests the given detector against the supplied object detection problem and - returns the precision, recall, and average precision. Note that the task is - to predict, for each images[i], the set of object locations given by - truth_dets[i]. Additionally, any detections on image[i] that match a box in - ignore[i] are ignored. That is, detections matching a box in ignore[i] do - not count as a false alarm and similarly if any element of ignore[i] goes - undetected it does not count as a missed detection. So we say that ignore[i] - contains a set of boxes that we "don't care" if they are detected or not. - - In particular, returns a matrix M such that: - - M(0) == the precision of the detector object. This is a number - in the range [0,1] which measures the fraction of detector outputs - which correspond to a real target. A value of 1 means the detector - never produces any false alarms while a value of 0 means it only - produces false alarms. - - M(1) == the recall of the detector object. This is a number in the - range [0,1] which measures the fraction of targets found by the - detector. A value of 1 means the detector found all the targets - in truth_dets while a value of 0 means the detector didn't locate - any of the targets. - - M(2) == the average precision of the detector object. This is a number - in the range [0,1] which measures the overall quality of the detector. - We compute this by taking all the detections output by the detector and - ordering them in descending order of their detection scores. Then we use - the average_precision() routine to score the ranked listing and store the - output into M(2). - - This function considers a detector output D to match a rectangle T if and - only if overlap_tester(T,D) returns true. - - Note that you can use the adjust_threshold argument to raise or lower the - detection threshold. This value is passed into the identically named - argument to the detector object and therefore influences the number of - output detections. It can be useful, for example, to lower the detection - threshold because it results in more detections being output by the - detector, and therefore provides more information in the ranking, - possibly raising the average precision. - !*/ - - template < - typename object_detector_type, - typename image_array_type - > - const matrix<double,1,3> test_object_detection_function ( - object_detector_type& detector, - const image_array_type& images, - const std::vector<std::vector<rectangle> >& truth_dets, - const std::vector<std::vector<rectangle> >& ignore, - const test_box_overlap& overlap_tester = test_box_overlap(), - const double adjust_threshold = 0 - ); - /*! - requires - - All the requirements of the above test_object_detection_function() routine. - ensures - - converts all the rectangles in truth_dets into full_object_detection objects - via full_object_detection's rectangle constructor. Then invokes - test_object_detection_function() on the full_object_detections and returns - the results. - !*/ - - template < - typename object_detector_type, - typename image_array_type - > - const matrix<double,1,3> test_object_detection_function ( - object_detector_type& detector, - const image_array_type& images, - const std::vector<std::vector<rectangle> >& truth_dets, - const test_box_overlap& overlap_tester = test_box_overlap(), - const double adjust_threshold = 0 - ); - /*! - requires - - All the requirements of the above test_object_detection_function() routine. - ensures - - This function simply invokes test_object_detection_function() with all the - given arguments and an empty set of ignore rectangles and returns the results. - !*/ - - template < - typename object_detector_type, - typename image_array_type - > - const matrix<double,1,3> test_object_detection_function ( - object_detector_type& detector, - const image_array_type& images, - const std::vector<std::vector<full_object_detection> >& truth_dets, - const test_box_overlap& overlap_tester = test_box_overlap(), - const double adjust_threshold = 0 - ); - /*! - requires - - All the requirements of the above test_object_detection_function() routine. - ensures - - This function simply invokes test_object_detection_function() with all the - given arguments and an empty set of ignore rectangles and returns the results. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename SUBNET, - typename image_array_type - > - const matrix<double,1,3> test_object_detection_function ( - loss_mmod<SUBNET>& detector, - const image_array_type& images, - const std::vector<std::vector<mmod_rect>>& truth_dets, - const test_box_overlap& overlap_tester = test_box_overlap(), - const double adjust_threshold = 0, - const test_box_overlap& overlaps_ignore_tester = test_box_overlap() - ); - /*! - requires - - is_learning_problem(images,truth_dets) - - image_array_type must be an implementation of dlib/array/array_kernel_abstract.h - and it must contain objects which can be accepted by detector(). - ensures - - This function is just like the test_object_detection_function() for - object_detector's except it runs on CNNs that use loss_mmod. - - Tests the given detector against the supplied object detection problem and - returns the precision, recall, and average precision. Note that the task is - to predict, for each images[i], the set of object locations, and their - corresponding labels, given by truth_dets[i]. Additionally, any detections - on image[i] that match a box in truth_dets[i] that are marked ignore are - ignored. That is, detections matching an ignore box, regardless of the - ignore box's label, do not count as a false alarm and similarly if any - ignored box in truth_dets goes undetected it does not count as a missed - detection. To test if a box overlaps an ignore box, we use overlaps_ignore_tester. - - In particular, returns a matrix M such that: - - M(0) == the precision of the detector object. This is a number - in the range [0,1] which measures the fraction of detector outputs - which correspond to a real target. A value of 1 means the detector - never produces any false alarms while a value of 0 means it only - produces false alarms. - - M(1) == the recall of the detector object. This is a number in the - range [0,1] which measures the fraction of targets found by the detector. - A value of 1 means the detector found all the non-ignore targets in - truth_dets while a value of 0 means the detector didn't locate any of the - targets. - - M(2) == the average precision of the detector object. This is a number - in the range [0,1] which measures the overall quality of the detector. - We compute this by taking all the detections output by the detector and - ordering them in descending order of their detection scores. Then we use - the average_precision() routine to score the ranked listing and store the - output into M(2). - - This function considers a detector output D to match a truth rectangle T if - and only if overlap_tester(T,D) returns true and the labels are identical strings. - - Note that you can use the adjust_threshold argument to raise or lower the - detection threshold. This value is passed into the identically named - argument to the detector object and therefore influences the number of - output detections. It can be useful, for example, to lower the detection - threshold because it results in more detections being output by the - detector, and therefore provides more information in the ranking, - possibly raising the average precision. - !*/ - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type, - typename image_array_type - > - const matrix<double,1,3> cross_validate_object_detection_trainer ( - const trainer_type& trainer, - const image_array_type& images, - const std::vector<std::vector<full_object_detection> >& truth_dets, - const std::vector<std::vector<rectangle> >& ignore, - const long folds, - const test_box_overlap& overlap_tester = test_box_overlap(), - const double adjust_threshold = 0 - ); - /*! - requires - - is_learning_problem(images,truth_dets) - - images.size() == ignore.size() - - 1 < folds <= images.size() - - trainer_type == some kind of object detection trainer (e.g structural_object_detection_trainer) - - image_array_type must be an implementation of dlib/array/array_kernel_abstract.h - and it must contain objects which can be accepted by detector(). - - it is legal to call trainer.train(images, truth_dets) - ensures - - Performs k-fold cross-validation by using the given trainer to solve an - object detection problem for the given number of folds. Each fold is tested - using the output of the trainer and a matrix summarizing the results is - returned. The matrix contains the precision, recall, and average - precision of the trained detectors and is defined identically to the - test_object_detection_function() routine defined at the top of this file. - !*/ - - template < - typename trainer_type, - typename image_array_type - > - const matrix<double,1,3> cross_validate_object_detection_trainer ( - const trainer_type& trainer, - const image_array_type& images, - const std::vector<std::vector<rectangle> >& truth_dets, - const std::vector<std::vector<rectangle> >& ignore, - const long folds, - const test_box_overlap& overlap_tester = test_box_overlap(), - const double adjust_threshold = 0 - ); - /*! - requires - - all the requirements of the above cross_validate_object_detection_trainer() routine. - ensures - - converts all the rectangles in truth_dets into full_object_detection objects - via full_object_detection's rectangle constructor. Then invokes - cross_validate_object_detection_trainer() on the full_object_detections and - returns the results. - !*/ - - template < - typename trainer_type, - typename image_array_type - > - const matrix<double,1,3> cross_validate_object_detection_trainer ( - const trainer_type& trainer, - const image_array_type& images, - const std::vector<std::vector<rectangle> >& truth_dets, - const long folds, - const test_box_overlap& overlap_tester = test_box_overlap(), - const double adjust_threshold = 0 - ); - /*! - requires - - All the requirements of the above cross_validate_object_detection_trainer() routine. - ensures - - This function simply invokes cross_validate_object_detection_trainer() with all - the given arguments and an empty set of ignore rectangles and returns the results. - !*/ - - template < - typename trainer_type, - typename image_array_type - > - const matrix<double,1,3> cross_validate_object_detection_trainer ( - const trainer_type& trainer, - const image_array_type& images, - const std::vector<std::vector<full_object_detection> >& truth_dets, - const long folds, - const test_box_overlap& overlap_tester = test_box_overlap(), - const double adjust_threshold = 0 - ); - /*! - requires - - All the requirements of the above cross_validate_object_detection_trainer() routine. - ensures - - This function simply invokes cross_validate_object_detection_trainer() with all - the given arguments and an empty set of ignore rectangles and returns the results. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_CROSS_VALIDATE_OBJECT_DETECTION_TRaINER_ABSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/svm/cross_validate_regression_trainer.h b/ml/dlib/dlib/svm/cross_validate_regression_trainer.h deleted file mode 100644 index a4c6077c9..000000000 --- a/ml/dlib/dlib/svm/cross_validate_regression_trainer.h +++ /dev/null @@ -1,155 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_CROSS_VALIDATE_REGRESSION_TRaINER_Hh_ -#define DLIB_CROSS_VALIDATE_REGRESSION_TRaINER_Hh_ - -#include <vector> -#include "../matrix.h" -#include "../statistics.h" -#include "cross_validate_regression_trainer_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename reg_funct_type, - typename sample_type, - typename label_type - > - matrix<double,1,4> - test_regression_function ( - reg_funct_type& reg_funct, - const std::vector<sample_type>& x_test, - const std::vector<label_type>& y_test - ) - { - - // make sure requires clause is not broken - DLIB_ASSERT( is_learning_problem(x_test,y_test) == true, - "\tmatrix test_regression_function()" - << "\n\t invalid inputs were given to this function" - << "\n\t is_learning_problem(x_test,y_test): " - << is_learning_problem(x_test,y_test)); - - running_stats<double> rs, rs_mae; - running_scalar_covariance<double> rc; - - for (unsigned long i = 0; i < x_test.size(); ++i) - { - // compute error - const double output = reg_funct(x_test[i]); - const double temp = output - y_test[i]; - - rs_mae.add(std::abs(temp)); - rs.add(temp*temp); - rc.add(output, y_test[i]); - } - - matrix<double,1,4> result; - result = rs.mean(), rc.correlation(), rs_mae.mean(), rs_mae.stddev(); - return result; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type, - typename sample_type, - typename label_type - > - matrix<double,1,4> - cross_validate_regression_trainer ( - const trainer_type& trainer, - const std::vector<sample_type>& x, - const std::vector<label_type>& y, - const long folds - ) - { - - // make sure requires clause is not broken - DLIB_ASSERT(is_learning_problem(x,y) == true && - 1 < folds && folds <= static_cast<long>(x.size()), - "\tmatrix cross_validate_regression_trainer()" - << "\n\t invalid inputs were given to this function" - << "\n\t x.size(): " << x.size() - << "\n\t folds: " << folds - << "\n\t is_learning_problem(x,y): " << is_learning_problem(x,y) - ); - - - - const long num_in_test = x.size()/folds; - const long num_in_train = x.size() - num_in_test; - - running_stats<double> rs, rs_mae; - running_scalar_covariance<double> rc; - - std::vector<sample_type> x_test, x_train; - std::vector<label_type> y_test, y_train; - - - long next_test_idx = 0; - - - for (long i = 0; i < folds; ++i) - { - x_test.clear(); - y_test.clear(); - x_train.clear(); - y_train.clear(); - - // load up the test samples - for (long cnt = 0; cnt < num_in_test; ++cnt) - { - x_test.push_back(x[next_test_idx]); - y_test.push_back(y[next_test_idx]); - next_test_idx = (next_test_idx + 1)%x.size(); - } - - // load up the training samples - long next = next_test_idx; - for (long cnt = 0; cnt < num_in_train; ++cnt) - { - x_train.push_back(x[next]); - y_train.push_back(y[next]); - next = (next + 1)%x.size(); - } - - - try - { - const typename trainer_type::trained_function_type& df = trainer.train(x_train,y_train); - - // do the training and testing - for (unsigned long j = 0; j < x_test.size(); ++j) - { - // compute error - const double output = df(x_test[j]); - const double temp = output - y_test[j]; - - rs_mae.add(std::abs(temp)); - rs.add(temp*temp); - rc.add(output, y_test[j]); - } - } - catch (invalid_nu_error&) - { - // just ignore cases which result in an invalid nu - } - - } // for (long i = 0; i < folds; ++i) - - matrix<double,1,4> result; - result = rs.mean(), rc.correlation(), rs_mae.mean(), rs_mae.stddev(); - return result; - } - -} - -// ---------------------------------------------------------------------------------------- - -#endif // DLIB_CROSS_VALIDATE_REGRESSION_TRaINER_Hh_ - - diff --git a/ml/dlib/dlib/svm/cross_validate_regression_trainer_abstract.h b/ml/dlib/dlib/svm/cross_validate_regression_trainer_abstract.h deleted file mode 100644 index d6298aa74..000000000 --- a/ml/dlib/dlib/svm/cross_validate_regression_trainer_abstract.h +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_CROSS_VALIDATE_REGRESSION_TRaINER_ABSTRACT_Hh_ -#ifdef DLIB_CROSS_VALIDATE_REGRESSION_TRaINER_ABSTRACT_Hh_ - -#include <vector> -#include "../matrix.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename reg_funct_type, - typename sample_type, - typename label_type - > - matrix<double,1,4> - test_regression_function ( - reg_funct_type& reg_funct, - const std::vector<sample_type>& x_test, - const std::vector<label_type>& y_test - ); - /*! - requires - - is_learning_problem(x_test, y_test) - - reg_funct_type == some kind of regression function object - (e.g. a decision_function created by the svr_trainer ) - ensures - - Tests reg_funct against the given samples in x_test and target values in - y_test and returns a matrix M summarizing the results. Specifically: - - M(0) == the mean squared error. - The MSE is given by: sum over i: pow(reg_funct(x_test[i]) - y_test[i], 2.0) - - M(1) == the correlation between reg_funct(x_test[i]) and y_test[i]. - This is a number between -1 and 1. - - M(2) == the mean absolute error. - This is given by: sum over i: abs(reg_funct(x_test[i]) - y_test[i]) - - M(3) == the standard deviation of the absolute error. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type, - typename sample_type, - typename label_type - > - matrix<double,1,4> - cross_validate_regression_trainer ( - const trainer_type& trainer, - const std::vector<sample_type>& x, - const std::vector<label_type>& y, - const long folds - ); - /*! - requires - - is_learning_problem(x,y) - - 1 < folds <= x.size() - - trainer_type == some kind of regression trainer object (e.g. svr_trainer) - ensures - - Performs k-fold cross validation by using the given trainer to solve a - regression problem for the given number of folds. Each fold is tested using - the output of the trainer. A matrix M summarizing the results is returned. - Specifically: - - M(0) == the mean squared error. - The MSE is given by: sum over i: pow(reg_funct(x[i]) - y[i], 2.0) - - M(1) == the correlation between a predicted y value and its true value. - This is a number between -1 and 1. - - M(2) == the mean absolute error. - This is given by: sum over i: abs(reg_funct(x_test[i]) - y_test[i]) - - M(3) == the standard deviation of the absolute error. - !*/ - -} - -// ---------------------------------------------------------------------------------------- - -#endif // DLIB_CROSS_VALIDATE_REGRESSION_TRaINER_ABSTRACT_Hh_ - - - diff --git a/ml/dlib/dlib/svm/cross_validate_sequence_labeler.h b/ml/dlib/dlib/svm/cross_validate_sequence_labeler.h deleted file mode 100644 index 75c4e363a..000000000 --- a/ml/dlib/dlib/svm/cross_validate_sequence_labeler.h +++ /dev/null @@ -1,152 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_CROSS_VALIDATE_SEQUENCE_LABeLER_Hh_ -#define DLIB_CROSS_VALIDATE_SEQUENCE_LABeLER_Hh_ - -#include "cross_validate_sequence_labeler_abstract.h" -#include <vector> -#include "../matrix.h" -#include "svm.h" - - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename sequence_labeler_type, - typename sequence_type - > - const matrix<double> test_sequence_labeler ( - const sequence_labeler_type& labeler, - const std::vector<sequence_type>& samples, - const std::vector<std::vector<unsigned long> >& labels - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( is_sequence_labeling_problem(samples, labels) == true, - "\tmatrix test_sequence_labeler()" - << "\n\t invalid inputs were given to this function" - << "\n\t is_sequence_labeling_problem(samples, labels): " - << is_sequence_labeling_problem(samples, labels)); - - matrix<double> res(labeler.num_labels(), labeler.num_labels()); - res = 0; - - std::vector<unsigned long> pred; - for (unsigned long i = 0; i < samples.size(); ++i) - { - labeler.label_sequence(samples[i], pred); - - for (unsigned long j = 0; j < pred.size(); ++j) - { - const unsigned long truth = labels[i][j]; - if (truth >= static_cast<unsigned long>(res.nr())) - { - // ignore labels the labeler doesn't know about. - continue; - } - - res(truth, pred[j]) += 1; - } - } - - return res; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type, - typename sequence_type - > - const matrix<double> cross_validate_sequence_labeler ( - const trainer_type& trainer, - const std::vector<sequence_type>& samples, - const std::vector<std::vector<unsigned long> >& labels, - const long folds - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_sequence_labeling_problem(samples,labels) == true && - 1 < folds && folds <= static_cast<long>(samples.size()), - "\tmatrix cross_validate_sequence_labeler()" - << "\n\t invalid inputs were given to this function" - << "\n\t samples.size(): " << samples.size() - << "\n\t folds: " << folds - << "\n\t is_sequence_labeling_problem(samples,labels): " << is_sequence_labeling_problem(samples,labels) - ); - -#ifdef ENABLE_ASSERTS - for (unsigned long i = 0; i < labels.size(); ++i) - { - for (unsigned long j = 0; j < labels[i].size(); ++j) - { - // make sure requires clause is not broken - DLIB_ASSERT(labels[i][j] < trainer.num_labels(), - "\t matrix cross_validate_sequence_labeler()" - << "\n\t The labels are invalid." - << "\n\t labels[i][j]: " << labels[i][j] - << "\n\t trainer.num_labels(): " << trainer.num_labels() - << "\n\t i: " << i - << "\n\t j: " << j - ); - } - } -#endif - - - - - const long num_in_test = samples.size()/folds; - const long num_in_train = samples.size() - num_in_test; - - std::vector<sequence_type> x_test, x_train; - std::vector<std::vector<unsigned long> > y_test, y_train; - - - long next_test_idx = 0; - - matrix<double> res; - - - for (long i = 0; i < folds; ++i) - { - x_test.clear(); - y_test.clear(); - x_train.clear(); - y_train.clear(); - - // load up the test samples - for (long cnt = 0; cnt < num_in_test; ++cnt) - { - x_test.push_back(samples[next_test_idx]); - y_test.push_back(labels[next_test_idx]); - next_test_idx = (next_test_idx + 1)%samples.size(); - } - - // load up the training samples - long next = next_test_idx; - for (long cnt = 0; cnt < num_in_train; ++cnt) - { - x_train.push_back(samples[next]); - y_train.push_back(labels[next]); - next = (next + 1)%samples.size(); - } - - - res += test_sequence_labeler(trainer.train(x_train,y_train), x_test, y_test); - - } // for (long i = 0; i < folds; ++i) - - return res; - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_CROSS_VALIDATE_SEQUENCE_LABeLER_Hh_ - - diff --git a/ml/dlib/dlib/svm/cross_validate_sequence_labeler_abstract.h b/ml/dlib/dlib/svm/cross_validate_sequence_labeler_abstract.h deleted file mode 100644 index 3d2409b28..000000000 --- a/ml/dlib/dlib/svm/cross_validate_sequence_labeler_abstract.h +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_CROSS_VALIDATE_SEQUENCE_LABeLER_ABSTRACT_Hh_ -#ifdef DLIB_CROSS_VALIDATE_SEQUENCE_LABeLER_ABSTRACT_Hh_ - -#include <vector> -#include "../matrix.h" -#include "svm.h" - - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename sequence_labeler_type, - typename sequence_type - > - const matrix<double> test_sequence_labeler ( - const sequence_labeler_type& labeler, - const std::vector<sequence_type>& samples, - const std::vector<std::vector<unsigned long> >& labels - ); - /*! - requires - - is_sequence_labeling_problem(samples, labels) - - sequence_labeler_type == dlib::sequence_labeler or an object with a - compatible interface. - ensures - - Tests labeler against the given samples and labels and returns a confusion - matrix summarizing the results. - - The confusion matrix C returned by this function has the following properties. - - C.nc() == labeler.num_labels() - - C.nr() == labeler.num_labels() - - C(T,P) == the number of times a sequence element with label T was predicted - to have a label of P. - - Any samples with a label value >= labeler.num_labels() are ignored. That - is, samples with labels the labeler hasn't ever seen before are ignored. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type, - typename sequence_type - > - const matrix<double> cross_validate_sequence_labeler ( - const trainer_type& trainer, - const std::vector<sequence_type>& samples, - const std::vector<std::vector<unsigned long> >& labels, - const long folds - ); - /*! - requires - - is_sequence_labeling_problem(samples, labels) - - 1 < folds <= samples.size() - - for all valid i and j: labels[i][j] < trainer.num_labels() - - trainer_type == dlib::structural_sequence_labeling_trainer or an object - with a compatible interface. - ensures - - performs k-fold cross validation by using the given trainer to solve the - given sequence labeling problem for the given number of folds. Each fold - is tested using the output of the trainer and the confusion matrix from all - folds is summed and returned. - - The total confusion matrix is computed by running test_sequence_labeler() - on each fold and summing its output. - - The number of folds used is given by the folds argument. - - The confusion matrix C returned by this function has the following properties. - - C.nc() == trainer.num_labels() - - C.nr() == trainer.num_labels() - - C(T,P) == the number of times a sequence element with label T was predicted - to have a label of P. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_CROSS_VALIDATE_SEQUENCE_LABeLER_ABSTRACT_Hh_ - - - diff --git a/ml/dlib/dlib/svm/cross_validate_sequence_segmenter.h b/ml/dlib/dlib/svm/cross_validate_sequence_segmenter.h deleted file mode 100644 index 8413f9165..000000000 --- a/ml/dlib/dlib/svm/cross_validate_sequence_segmenter.h +++ /dev/null @@ -1,187 +0,0 @@ -// Copyright (C) 2013 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_CROSS_VALIDATE_SEQUENCE_sEGMENTER_Hh_ -#define DLIB_CROSS_VALIDATE_SEQUENCE_sEGMENTER_Hh_ - -#include "cross_validate_sequence_segmenter_abstract.h" -#include "sequence_segmenter.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - namespace impl - { - template < - typename sequence_segmenter_type, - typename sequence_type - > - const matrix<double,1,3> raw_metrics_test_sequence_segmenter ( - const sequence_segmenter_type& segmenter, - const std::vector<sequence_type>& samples, - const std::vector<std::vector<std::pair<unsigned long,unsigned long> > >& segments - ) - { - std::vector<std::pair<unsigned long,unsigned long> > truth; - std::vector<std::pair<unsigned long,unsigned long> > pred; - - double true_hits = 0; - double total_detections = 0; - double total_true_segments = 0; - - for (unsigned long i = 0; i < samples.size(); ++i) - { - segmenter.segment_sequence(samples[i], pred); - truth = segments[i]; - // sort the segments so they will be in the same orders - std::sort(truth.begin(), truth.end()); - std::sort(pred.begin(), pred.end()); - - total_true_segments += truth.size(); - total_detections += pred.size(); - - unsigned long j=0,k=0; - while (j < pred.size() && k < truth.size()) - { - if (pred[j].first == truth[k].first && - pred[j].second == truth[k].second) - { - ++true_hits; - ++j; - ++k; - } - else if (pred[j].first < truth[k].first) - { - ++j; - } - else - { - ++k; - } - } - } - - matrix<double,1,3> res; - res = total_detections, total_true_segments, true_hits; - return res; - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename sequence_segmenter_type, - typename sequence_type - > - const matrix<double,1,3> test_sequence_segmenter ( - const sequence_segmenter_type& segmenter, - const std::vector<sequence_type>& samples, - const std::vector<std::vector<std::pair<unsigned long,unsigned long> > >& segments - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( is_sequence_segmentation_problem(samples, segments) == true, - "\tmatrix test_sequence_segmenter()" - << "\n\t invalid inputs were given to this function" - << "\n\t is_sequence_segmentation_problem(samples, segments): " - << is_sequence_segmentation_problem(samples, segments)); - - const matrix<double,1,3> metrics = impl::raw_metrics_test_sequence_segmenter(segmenter, samples, segments); - - const double total_detections = metrics(0); - const double total_true_segments = metrics(1); - const double true_hits = metrics(2); - - const double precision = (total_detections ==0) ? 1 : true_hits/total_detections; - const double recall = (total_true_segments==0) ? 1 : true_hits/total_true_segments; - const double f1 = (precision+recall ==0) ? 0 : 2*precision*recall/(precision+recall); - - matrix<double,1,3> res; - res = precision, recall, f1; - return res; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type, - typename sequence_type - > - const matrix<double,1,3> cross_validate_sequence_segmenter ( - const trainer_type& trainer, - const std::vector<sequence_type>& samples, - const std::vector<std::vector<std::pair<unsigned long,unsigned long> > >& segments, - const long folds - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( is_sequence_segmentation_problem(samples, segments) == true && - 1 < folds && folds <= static_cast<long>(samples.size()), - "\tmatrix cross_validate_sequence_segmenter()" - << "\n\t invalid inputs were given to this function" - << "\n\t folds: " << folds - << "\n\t is_sequence_segmentation_problem(samples, segments): " - << is_sequence_segmentation_problem(samples, segments)); - - - const long num_in_test = samples.size()/folds; - const long num_in_train = samples.size() - num_in_test; - - std::vector<sequence_type> x_test, x_train; - std::vector<std::vector<std::pair<unsigned long,unsigned long> > > y_test, y_train; - - long next_test_idx = 0; - - matrix<double,1,3> metrics; - metrics = 0; - - for (long i = 0; i < folds; ++i) - { - x_test.clear(); - y_test.clear(); - x_train.clear(); - y_train.clear(); - - // load up the test samples - for (long cnt = 0; cnt < num_in_test; ++cnt) - { - x_test.push_back(samples[next_test_idx]); - y_test.push_back(segments[next_test_idx]); - next_test_idx = (next_test_idx + 1)%samples.size(); - } - - // load up the training samples - long next = next_test_idx; - for (long cnt = 0; cnt < num_in_train; ++cnt) - { - x_train.push_back(samples[next]); - y_train.push_back(segments[next]); - next = (next + 1)%samples.size(); - } - - - metrics += impl::raw_metrics_test_sequence_segmenter(trainer.train(x_train,y_train), x_test, y_test); - } // for (long i = 0; i < folds; ++i) - - - const double total_detections = metrics(0); - const double total_true_segments = metrics(1); - const double true_hits = metrics(2); - - const double precision = (total_detections ==0) ? 1 : true_hits/total_detections; - const double recall = (total_true_segments==0) ? 1 : true_hits/total_true_segments; - const double f1 = (precision+recall ==0) ? 0 : 2*precision*recall/(precision+recall); - - matrix<double,1,3> res; - res = precision, recall, f1; - return res; - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_CROSS_VALIDATE_SEQUENCE_sEGMENTER_Hh_ - - diff --git a/ml/dlib/dlib/svm/cross_validate_sequence_segmenter_abstract.h b/ml/dlib/dlib/svm/cross_validate_sequence_segmenter_abstract.h deleted file mode 100644 index 87e21d592..000000000 --- a/ml/dlib/dlib/svm/cross_validate_sequence_segmenter_abstract.h +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright (C) 2013 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_CROSS_VALIDATE_SEQUENCE_sEGMENTER_ABSTRACT_Hh_ -#ifdef DLIB_CROSS_VALIDATE_SEQUENCE_sEGMENTER_ABSTRACT_Hh_ - -#include "sequence_segmenter_abstract.h" -#include "../matrix.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename sequence_segmenter_type, - typename sequence_type - > - const matrix<double,1,3> test_sequence_segmenter ( - const sequence_segmenter_type& segmenter, - const std::vector<sequence_type>& samples, - const std::vector<std::vector<std::pair<unsigned long,unsigned long> > >& segments - ); - /*! - requires - - is_sequence_segmentation_problem(samples, segments) == true - - sequence_segmenter_type == dlib::sequence_segmenter or an object with a - compatible interface. - ensures - - Tests segmenter against the given samples and truth segments and returns the - precision, recall, and F1-score obtained by the segmenter. That is, the goal - of the segmenter should be to predict segments[i] given samples[i] as input. - The test_sequence_segmenter() routine therefore measures how well the - segmenter is able to perform this task. - - Returns a row matrix M with the following properties: - - M(0) == The precision of the segmenter measured against the task of - detecting the segments of each sample. This is a number in the range 0 - to 1 and represents the fraction of segments output by the segmenter - which correspond to true segments for each sample. - - M(1) == The recall of the segmenter measured against the task of - detecting the segments of each sample. This is a number in the range 0 - to 1 and represents the fraction of the true segments found by the - segmenter. - - M(2) == The F1-score for the segmenter. This is the harmonic mean of - M(0) and M(1). - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type, - typename sequence_type - > - const matrix<double,1,3> cross_validate_sequence_segmenter ( - const trainer_type& trainer, - const std::vector<sequence_type>& samples, - const std::vector<std::vector<std::pair<unsigned long,unsigned long> > >& segments, - const long folds - ); - /*! - requires - - is_sequence_segmentation_problem(samples, segments) == true - - 1 < folds <= samples.size() - - trainer_type == dlib::structural_sequence_segmentation_trainer or an object - with a compatible interface. - ensures - - Performs k-fold cross validation by using the given trainer to solve the - given sequence segmentation problem for the given number of folds. Each fold - is tested using the output of the trainer and the results from all folds are - summarized and returned. - - This function returns the precision, recall, and F1-score for the trainer. - In particular, the output is the same as the output from the - test_sequence_segmenter() routine defined above. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_CROSS_VALIDATE_SEQUENCE_sEGMENTER_ABSTRACT_Hh_ - diff --git a/ml/dlib/dlib/svm/cross_validate_track_association_trainer.h b/ml/dlib/dlib/svm/cross_validate_track_association_trainer.h deleted file mode 100644 index dac519b7a..000000000 --- a/ml/dlib/dlib/svm/cross_validate_track_association_trainer.h +++ /dev/null @@ -1,163 +0,0 @@ -// Copyright (C) 2014 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_CROSS_VALIDATE_TRACK_ASSOCIATION_TrAINER_Hh_ -#define DLIB_CROSS_VALIDATE_TRACK_ASSOCIATION_TrAINER_Hh_ - -#include "cross_validate_track_association_trainer_abstract.h" -#include "structural_track_association_trainer.h" - -namespace dlib -{ -// ---------------------------------------------------------------------------------------- - - namespace impl - { - template < - typename track_association_function, - typename detection_type, - typename label_type - > - void test_track_association_function ( - const track_association_function& assoc, - const std::vector<std::vector<labeled_detection<detection_type,label_type> > >& samples, - unsigned long& total_dets, - unsigned long& correctly_associated_dets - ) - { - const typename track_association_function::association_function_type& f = assoc.get_assignment_function(); - - typedef typename detection_type::track_type track_type; - using namespace impl; - - dlib::rand rnd; - std::vector<track_type> tracks; - std::map<label_type,long> track_idx; // tracks[track_idx[id]] == track with ID id. - - for (unsigned long j = 0; j < samples.size(); ++j) - { - std::vector<labeled_detection<detection_type,label_type> > dets = samples[j]; - // Shuffle the order of the detections so we can be sure that there isn't - // anything funny going on like the detections always coming in the same - // order relative to their labels and the association function just gets - // lucky by picking the same assignment ordering every time. So this way - // we know the assignment function really is doing something rather than - // just being lucky. - randomize_samples(dets, rnd); - - total_dets += dets.size(); - std::vector<long> assignments = f(get_unlabeled_dets(dets), tracks); - std::vector<bool> updated_track(tracks.size(), false); - // now update all the tracks with the detections that associated to them. - for (unsigned long k = 0; k < assignments.size(); ++k) - { - // If the detection is associated to tracks[assignments[k]] - if (assignments[k] != -1) - { - tracks[assignments[k]].update_track(dets[k].det); - updated_track[assignments[k]] = true; - - // if this detection was supposed to go to this track - if (track_idx.count(dets[k].label) && track_idx[dets[k].label]==assignments[k]) - ++correctly_associated_dets; - - track_idx[dets[k].label] = assignments[k]; - } - else - { - track_type new_track; - new_track.update_track(dets[k].det); - tracks.push_back(new_track); - - // if this detection was supposed to go to a new track - if (track_idx.count(dets[k].label) == 0) - ++correctly_associated_dets; - - track_idx[dets[k].label] = tracks.size()-1; - } - } - - // Now propagate all the tracks that didn't get any detections. - for (unsigned long k = 0; k < updated_track.size(); ++k) - { - if (!updated_track[k]) - tracks[k].propagate_track(); - } - } - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename track_association_function, - typename detection_type, - typename label_type - > - double test_track_association_function ( - const track_association_function& assoc, - const std::vector<std::vector<std::vector<labeled_detection<detection_type,label_type> > > >& samples - ) - { - unsigned long total_dets = 0; - unsigned long correctly_associated_dets = 0; - - for (unsigned long i = 0; i < samples.size(); ++i) - { - impl::test_track_association_function(assoc, samples[i], total_dets, correctly_associated_dets); - } - - return (double)correctly_associated_dets/(double)total_dets; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type, - typename detection_type, - typename label_type - > - double cross_validate_track_association_trainer ( - const trainer_type& trainer, - const std::vector<std::vector<std::vector<labeled_detection<detection_type,label_type> > > >& samples, - const long folds - ) - { - const long num_in_test = samples.size()/folds; - const long num_in_train = samples.size() - num_in_test; - - std::vector<std::vector<std::vector<labeled_detection<detection_type,label_type> > > > samples_train; - - long next_test_idx = 0; - unsigned long total_dets = 0; - unsigned long correctly_associated_dets = 0; - - for (long i = 0; i < folds; ++i) - { - samples_train.clear(); - - // load up the training samples - long next = (next_test_idx + num_in_test)%samples.size(); - for (long cnt = 0; cnt < num_in_train; ++cnt) - { - samples_train.push_back(samples[next]); - next = (next + 1)%samples.size(); - } - - const track_association_function<detection_type>& df = trainer.train(samples_train); - for (long cnt = 0; cnt < num_in_test; ++cnt) - { - impl::test_track_association_function(df, samples[next_test_idx], total_dets, correctly_associated_dets); - next_test_idx = (next_test_idx + 1)%samples.size(); - } - } - - return (double)correctly_associated_dets/(double)total_dets; - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_CROSS_VALIDATE_TRACK_ASSOCIATION_TrAINER_Hh_ - - diff --git a/ml/dlib/dlib/svm/cross_validate_track_association_trainer_abstract.h b/ml/dlib/dlib/svm/cross_validate_track_association_trainer_abstract.h deleted file mode 100644 index 76b985600..000000000 --- a/ml/dlib/dlib/svm/cross_validate_track_association_trainer_abstract.h +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright (C) 2014 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_CROSS_VALIDATE_TRACK_ASSOCIATION_TrAINER_ABSTRACT_Hh_ -#ifdef DLIB_CROSS_VALIDATE_TRACK_ASSOCIATION_TrAINER_ABSTRACT_Hh_ - -#include "structural_track_association_trainer_abstract.h" -#include "svm_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename track_association_function, - typename detection_type, - typename label_type - > - double test_track_association_function ( - const track_association_function& assoc, - const std::vector<std::vector<std::vector<labeled_detection<detection_type,label_type> > > >& samples - ); - /*! - requires - - is_track_association_problem(samples) - - track_association_function == an instantiation of the dlib::track_association_function - template or an object with a compatible interface. - ensures - - Tests assoc against the given samples and returns the fraction of detections - which were correctly associated to their tracks. That is, if assoc produces - perfect tracks when used then this function returns a value of 1. Similarly, - if 5% of the detections were associated to the incorrect track then the - return value is 0.05. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type, - typename detection_type, - typename label_type - > - double cross_validate_track_association_trainer ( - const trainer_type& trainer, - const std::vector<std::vector<std::vector<labeled_detection<detection_type,label_type> > > >& samples, - const long folds - ); - /*! - requires - - is_track_association_problem(samples) - - 1 < folds <= samples.size() - - trainer_type == dlib::structural_track_association_trainer or an object with - a compatible interface. - ensures - - Performs k-fold cross validation by using the given trainer to solve the - given track association learning problem for the given number of folds. Each - fold is tested using the output of the trainer and the fraction of - mis-associated detections is returned (i.e. this function returns the same - measure of track association quality as test_track_association_function()). - - The number of folds used is given by the folds argument. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_CROSS_VALIDATE_TRACK_ASSOCIATION_TrAINER_ABSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/svm/empirical_kernel_map.h b/ml/dlib/dlib/svm/empirical_kernel_map.h deleted file mode 100644 index 7a91e591a..000000000 --- a/ml/dlib/dlib/svm/empirical_kernel_map.h +++ /dev/null @@ -1,429 +0,0 @@ -// Copyright (C) 2009 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_EMPIRICAL_KERNEl_MAP_H_ -#define DLIB_EMPIRICAL_KERNEl_MAP_H_ - -#include "../matrix.h" -#include "empirical_kernel_map_abstract.h" -#include "linearly_independent_subset_finder.h" -#include <vector> -#include "../algs.h" -#include "kernel_matrix.h" -#include "function.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template <typename kernel_type, typename EXP> - const decision_function<kernel_type> convert_to_decision_function ( - const projection_function<kernel_type>& project_funct, - const matrix_exp<EXP>& vect - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(project_funct.out_vector_size() > 0 && is_vector(vect) && - project_funct.out_vector_size() == vect.size() && project_funct.weights.nc() == project_funct.basis_vectors.size(), - "\t const decision_function convert_to_decision_function()" - << "\n\t Invalid inputs to this function." - << "\n\t project_funct.out_vector_size(): " << project_funct.out_vector_size() - << "\n\t project_funct.weights.nc(): " << project_funct.weights.nc() - << "\n\t project_funct.basis_vectors.size(): " << project_funct.basis_vectors.size() - << "\n\t is_vector(vect): " << is_vector(vect) - << "\n\t vect.size(): " << vect.size() - ); - - return decision_function<kernel_type>(trans(project_funct.weights)*vect, - 0, - project_funct.kernel_function, - project_funct.basis_vectors); - } - -// ---------------------------------------------------------------------------------------- - - template <typename kern_type> - class empirical_kernel_map - { - public: - - struct empirical_kernel_map_error : public error - { - empirical_kernel_map_error(const std::string& message): error(message) {} - }; - - typedef kern_type kernel_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - - void clear ( - ) - { - empirical_kernel_map().swap(*this); - } - - template <typename T> - void load( - const kernel_type& kernel_, - const T& basis_samples - ) - { - load_impl(kernel_, mat(basis_samples)); - } - - void load( - const linearly_independent_subset_finder<kernel_type>& lisf - ) - { - if (lisf.size() == 0) - { - std::ostringstream sout; - sout << "An empty linearly_independent_subset_finder was supplied to the\n" - << "empirical_kernel_map::load() function. One reason this might occur\n" - << "is if your dataset contains only zero vectors (or vectors \n" - << "approximately zero).\n"; - clear(); - throw empirical_kernel_map_error(sout.str()); - } - - kernel = lisf.get_kernel(); - weights = trans(chol(lisf.get_inv_kernel_marix())); - basis.resize(lisf.size()); - for (unsigned long i = 0; i < basis.size(); ++i) - basis[i] = lisf[i]; - - } - - const kernel_type get_kernel ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(out_vector_size() > 0, - "\tconst kernel_type empirical_kernel_map::get_kernel()" - << "\n\t You have to load this object with a kernel before you can call this function" - << "\n\t this: " << this - ); - - return kernel; - } - - long out_vector_size ( - ) const - { - return weights.nr(); - } - - unsigned long basis_size ( - ) const - { - return basis.size(); - } - - const sample_type& operator[] ( - unsigned long idx - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT( idx < basis_size(), - "\t const sample_type& empirical_kernel_map::operator[](idx)" - << "\n\t Invalid inputs to this function." - << "\n\t basis_size(): " << basis_size() - << "\n\t this: " << this - ); - - return basis[idx]; - } - - template <typename EXP> - const decision_function<kernel_type> convert_to_decision_function ( - const matrix_exp<EXP>& vect - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(out_vector_size() != 0 && is_vector(vect) && out_vector_size() == vect.size(), - "\t const decision_function empirical_kernel_map::convert_to_decision_function()" - << "\n\t Invalid inputs to this function." - << "\n\t out_vector_size(): " << out_vector_size() - << "\n\t is_vector(vect): " << is_vector(vect) - << "\n\t vect.size(): " << vect.size() - << "\n\t this: " << this - ); - - return decision_function<kernel_type>(trans(weights)*vect, 0, kernel, mat(basis)); - } - - template <typename EXP> - const distance_function<kernel_type> convert_to_distance_function ( - const matrix_exp<EXP>& vect - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(out_vector_size() != 0 && is_vector(vect) && out_vector_size() == vect.size(), - "\t const distance_function empirical_kernel_map::convert_to_distance_function()" - << "\n\t Invalid inputs to this function." - << "\n\t out_vector_size(): " << out_vector_size() - << "\n\t is_vector(vect): " << is_vector(vect) - << "\n\t vect.size(): " << vect.size() - << "\n\t this: " << this - ); - - return distance_function<kernel_type>(trans(weights)*vect, dot(vect,vect), kernel, mat(basis)); - } - - const projection_function<kernel_type> get_projection_function ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(out_vector_size() != 0, - "\tconst projection_function empirical_kernel_map::get_projection_function()" - << "\n\t You have to load this object with data before you can call this function" - << "\n\t this: " << this - ); - - return projection_function<kernel_type>(weights, kernel, mat(basis)); - } - - const matrix<scalar_type,0,0,mem_manager_type> get_transformation_to ( - const empirical_kernel_map& target - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(out_vector_size() != 0 && - target.out_vector_size() != 0 && - get_kernel() == target.get_kernel(), - "\t const matrix empirical_kernel_map::get_transformation_to(target)" - << "\n\t Invalid inputs were given to this function" - << "\n\t out_vector_size(): " << out_vector_size() - << "\n\t target.out_vector_size(): " << target.out_vector_size() - << "\n\t get_kernel()==target.get_kernel(): " << (get_kernel()==target.get_kernel()) - << "\n\t this: " << this - ); - - return target.weights * kernel_matrix(target.get_kernel(),target.basis, basis)*trans(weights); - } - - void get_transformation_to ( - const empirical_kernel_map& target, - matrix<scalar_type, 0, 0, mem_manager_type>& tmat, - projection_function<kernel_type>& partial_projection - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(out_vector_size() != 0 && - target.out_vector_size() != 0 && - get_kernel() == target.get_kernel() && - basis_size() < target.basis_size(), - "\t void empirical_kernel_map::get_transformation_to(target, tmat, partial_projection)" - << "\n\t Invalid inputs were given to this function" - << "\n\t out_vector_size(): " << out_vector_size() - << "\n\t target.out_vector_size(): " << target.out_vector_size() - << "\n\t basis_size(): " << basis_size() - << "\n\t target.basis_size(): " << target.basis_size() - << "\n\t get_kernel()==target.get_kernel(): " << (get_kernel()==target.get_kernel()) - << "\n\t this: " << this - ); - -#ifdef ENABLE_ASSERTS - for (unsigned long i = 0; i < basis_size(); ++i) - { - DLIB_ASSERT(dlib::equal((*this)[i], target[i]), - "\t const matrix empirical_kernel_map::get_transformation_to(target, tmat, partial_projection)" - << "\n\t target must contain a superset of the basis vectors in *this" - << "\n\t i: " << i - << "\n\t this: " << this - ); - } -#endif - - const unsigned long num1 = basis.size(); - const unsigned long num2 = target.basis.size(); - - tmat = colm(target.weights, range(0,num1-1))*kernel_matrix(kernel, basis)*trans(weights); - - empirical_kernel_map temp_ekm; - temp_ekm.load(kernel, rowm(mat(target.basis), range(num1,num2-1))); - - partial_projection = temp_ekm.get_projection_function(); - - partial_projection.weights = colm(target.weights,range(num1,num2-1))* - kernel_matrix(kernel, temp_ekm.basis)* - trans(temp_ekm.weights)* - partial_projection.weights; - } - - const matrix<scalar_type,0,1,mem_manager_type>& project ( - const sample_type& samp - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(out_vector_size() != 0, - "\tconst matrix empirical_kernel_map::project()" - << "\n\t You have to load this object with data before you can call this function" - << "\n\t this: " << this - ); - - temp1 = kernel_matrix(kernel, basis, samp); - temp2 = weights*temp1; - return temp2; - } - - const matrix<scalar_type,0,1,mem_manager_type>& project ( - const sample_type& samp, - scalar_type& projection_error - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(out_vector_size() != 0, - "\tconst matrix empirical_kernel_map::project()" - << "\n\t You have to load this object with data before you can call this function" - << "\n\t this: " << this - ); - - temp1 = kernel_matrix(kernel, basis, samp); - temp2 = weights*temp1; - // This value should never be negative (it measures squared distance) but I'm putting the abs() - // here just for good measure since rounding error might push it slightly negative. - projection_error = std::abs( kernel(samp,samp) - dot(temp2,temp2)); - - return temp2; - } - - void swap ( - empirical_kernel_map& item - ) - { - basis.swap(item.basis); - weights.swap(item.weights); - std::swap(kernel, item.kernel); - - temp1.swap(item.temp1); - temp2.swap(item.temp2); - } - - friend void serialize ( - const empirical_kernel_map& item, - std::ostream& out - ) - { - serialize(item.basis, out); - serialize(item.weights, out); - serialize(item.kernel, out); - } - - friend void deserialize ( - empirical_kernel_map& item, - std::istream& in - ) - { - deserialize(item.basis, in); - deserialize(item.weights, in); - deserialize(item.kernel, in); - } - - private: - - template <typename T> - void load_impl( - const kernel_type& kernel_, - const T& basis_samples - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(basis_samples.size() > 0 && is_vector(basis_samples), - "\tvoid empirical_kernel_map::load(kernel,basis_samples)" - << "\n\t You have to give a non-empty set of basis_samples and it must be a vector" - << "\n\t basis_samples.size(): " << basis_samples.size() - << "\n\t is_vector(basis_samples): " << is_vector(basis_samples) - << "\n\t this: " << this - ); - - // clear out the weights before we begin. This way if an exception throws - // this object will already be in the right state. - weights.set_size(0,0); - kernel = kernel_; - basis.clear(); - basis.reserve(basis_samples.size()); - - // find out the value of the largest norm of the elements in basis_samples. - const scalar_type max_norm = max(diag(kernel_matrix(kernel, basis_samples))); - // we will consider anything less than or equal to this number to be 0 - const scalar_type eps = max_norm*100*std::numeric_limits<scalar_type>::epsilon(); - - // Copy all the basis_samples into basis but make sure we don't copy any samples - // that have length 0 - for (long i = 0; i < basis_samples.size(); ++i) - { - const scalar_type norm = kernel(basis_samples(i), basis_samples(i)); - if (norm > eps) - { - basis.push_back(basis_samples(i)); - } - } - - if (basis.size() == 0) - { - clear(); - throw empirical_kernel_map_error("All basis_samples given to empirical_kernel_map::load() were zero vectors"); - } - - matrix<scalar_type,0,0,mem_manager_type> K(kernel_matrix(kernel, basis)), U,W,V; - - if (svd2(false,true,K,U,W,V)) - { - clear(); - throw empirical_kernel_map_error("While loading empirical_kernel_map with data, SVD failed to converge."); - } - - - // now count how many elements of W are non-zero - const long num_not_zero = static_cast<long>(sum(W>eps)); - - // Really, this should never happen. But I'm checking for good measure. - if (num_not_zero == 0) - { - clear(); - throw empirical_kernel_map_error("While loading empirical_kernel_map with data, SVD failed"); - } - - weights.set_size(num_not_zero, basis.size()); - - // now fill the weights matrix with the output of the SVD - long counter = 0; - for (long i =0; i < W.size(); ++i) - { - double val = W(i); - if (val > eps) - { - val = std::sqrt(val); - set_rowm(weights,counter) = rowm(trans(V),i)/val; - ++counter; - } - } - - } - - - std::vector<sample_type> basis; - matrix<scalar_type,0,0,mem_manager_type> weights; - kernel_type kernel; - - // These members don't contribute to the logical state of this object. They are - // just here so that they don't have to be reallocated every time the project() function - // is called. - mutable matrix<scalar_type,0,1,mem_manager_type> temp1, temp2; - - }; - - template <typename kernel_type> - void swap ( - empirical_kernel_map<kernel_type>& a, - empirical_kernel_map<kernel_type>& b - ) { a.swap(b); } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_EMPIRICAL_KERNEl_MAP_H_ - diff --git a/ml/dlib/dlib/svm/empirical_kernel_map_abstract.h b/ml/dlib/dlib/svm/empirical_kernel_map_abstract.h deleted file mode 100644 index 8fc413447..000000000 --- a/ml/dlib/dlib/svm/empirical_kernel_map_abstract.h +++ /dev/null @@ -1,430 +0,0 @@ -// Copyright (C) 2009 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_EMPIRICAL_KERNEl_MAP_ABSTRACT_H_ -#ifdef DLIB_EMPIRICAL_KERNEl_MAP_ABSTRACT_H_ - -#include <vector> -#include "../matrix.h" -#include "kernel_abstract.h" -#include "function_abstract.h" -#include "linearly_independent_subset_finder_abstract.h" -#include <vector> - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename kernel_type, - typename EXP - > - const decision_function<kernel_type> convert_to_decision_function ( - const projection_function<kernel_type>& project_funct, - const matrix_exp<EXP>& vect - ); - /*! - requires - - is_vector(vect) == true - - vect.size() == project_funct.out_vector_size() - - project_funct.out_vector_size() > 0 - - project_funct.weights.nc() == project_funct.basis_vectors.size() - ensures - - This function interprets the given vector as a point in the kernel feature space defined - by the given projection function. The return value of this function is a decision - function, DF, that represents the given vector in the following sense: - - for all possible sample_type objects, S, it is the case that DF(S) == dot(project_funct(S), vect) - (i.e. the returned decision function computes dot products, in kernel feature space, - between vect and any argument you give it. Note also that this equality is exact, even - for sample_type objects not in the span of the basis_vectors.) - - DF.kernel_function == project_funct.kernel_function - - DF.b == 0 - - DF.basis_vectors == project_funct.basis_vectors. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename kern_type - > - class empirical_kernel_map - { - /*! - REQUIREMENTS ON kern_type - - must be a kernel function object as defined in dlib/svm/kernel_abstract.h - - INITIAL VALUE - - out_vector_size() == 0 - - basis_size() == 0 - - WHAT THIS OBJECT REPRESENTS - This object represents a map from objects of sample_type (the kind of object - a kernel function operates on) to finite dimensional column vectors which - represent points in the kernel feature space defined by whatever kernel - is used with this object. - - To use the empirical_kernel_map you supply it with a particular kernel and a set of - basis samples. After that you can present it with new samples and it will project - them into the part of kernel feature space spanned by your basis samples. - - This means the empirical_kernel_map is a tool you can use to very easily kernelize - any algorithm that operates on column vectors. All you have to do is select a - set of basis samples and then use the empirical_kernel_map to project all your - data points into the part of kernel feature space spanned by those basis samples. - Then just run your normal algorithm on the output vectors and it will be effectively - kernelized. - - Regarding methods to select a set of basis samples, if you are working with only a - few thousand samples then you can just use all of them as basis samples. - Alternatively, the linearly_independent_subset_finder often works well for - selecting a basis set. I also find that picking a random subset typically works - well. - - - The empirical kernel map is something that has been around in the kernel methods - literature for a long time but is seemingly not well known. Anyway, one of the - best books on the subject is the following: - Learning with Kernels: Support Vector Machines, Regularization, Optimization, - and Beyond by Bernhard Schlkopf, Alexander J. Smola - The authors discuss the empirical kernel map as well as many other interesting - topics. - !*/ - - public: - - typedef kern_type kernel_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - - struct empirical_kernel_map_error : public error; - /*! - This is an exception class used to indicate a failure to create a - kernel map from data given by the user. - !*/ - - empirical_kernel_map ( - ); - /*! - ensures - - this object is properly initialized - !*/ - - void clear ( - ); - /*! - ensures - - this object has its initial value - !*/ - - template <typename T> - void load( - const kernel_type& kernel, - const T& basis_samples - ); - /*! - requires - - T must be a dlib::matrix type or something convertible to a matrix via mat() - (e.g. a std::vector) - - is_vector(basis_samples) == true - - basis_samples.size() > 0 - - kernel must be capable of operating on the elements of basis_samples. That is, - expressions such as kernel(basis_samples(0), basis_samples(0)) should make sense. - ensures - - 0 < #out_vector_size() <= basis_samples.size() - - #basis_size() == basis_samples.size() - - #get_kernel() == kernel - - This function constructs a map between normal sample_type objects and the - subspace of the kernel feature space defined by the given kernel and the - given set of basis samples. So after this function has been called you - will be able to project sample_type objects into kernel feature space - and obtain the resulting vector as a regular column matrix. - - The basis samples are loaded into this object in the order in which they - are stored in basis_samples. That is: - - for all valid i: (*this)[i] == basis_samples(i) - throws - - empirical_kernel_map_error - This exception is thrown if we are unable to create a kernel map. - If this happens then this object will revert back to its initial value. - !*/ - - void load( - const linearly_independent_subset_finder<kernel_type>& lisf - ); - /*! - ensures - - #out_vector_size() == lisf.dictionary_size() - - #basis_size() == lisf.dictionary_size() - - #get_kernel() == lisf.get_kernel() - - Uses the dictionary vectors from lisf as a basis set. Thus, this function - constructs a map between normal sample_type objects and the subspace of - the kernel feature space defined by the given kernel and the given set - of basis samples. So after this function has been called you will be - able to project sample_type objects into kernel feature space and obtain - the resulting vector as a regular column matrix. - - The basis samples are loaded into this object in the order in which they - are stored in lisf. That is: - - for all valid i: (*this)[i] == lisf[i] - throws - - empirical_kernel_map_error - This exception is thrown if we are unable to create a kernel map. - E.g. if the lisf.size() == 0. - If this happens then this object will revert back to its initial value. - !*/ - - const kernel_type get_kernel ( - ) const; - /*! - requires - - out_vector_size() != 0 - ensures - - returns a copy of the kernel used by this object - !*/ - - long out_vector_size ( - ) const; - /*! - ensures - - if (this object has been loaded with basis samples) then - - returns the dimensionality of the vectors output by the project() function. - - else - - returns 0 - !*/ - - unsigned long basis_size ( - ) const; - /*! - ensures - - returns the number of basis vectors in projection_functions created - by this object. This is also equal to the number of basis vectors - given to the load() function. - !*/ - - const sample_type& operator[] ( - unsigned long idx - ) const; - /*! - requires - - idx < basis_size() - ensures - - returns a const reference to the idx'th basis vector contained inside - this object. - !*/ - - const matrix<scalar_type,0,1,mem_manager_type>& project ( - const sample_type& sample - ) const; - /*! - requires - - out_vector_size() != 0 - ensures - - takes the given sample and projects it into the kernel feature space - of out_vector_size() dimensions defined by this kernel map and - returns the resulting vector. - - in more precise terms, this function returns a vector such that: - - The returned vector will contain out_vector_size() elements. - - for any sample_type object S, the following equality is approximately true: - - get_kernel()(sample,S) == dot(project(sample), project(S)). - - The approximation error in the above equality will be zero (within rounding error) - if both sample_type objects involved are within the span of the set of basis - samples given to the load() function. If they are not then there will be some - approximation error. Note that all the basis samples are always within their - own span. So the equality is always exact for the samples given to the load() - function. - !*/ - - const matrix<scalar_type,0,1,mem_manager_type>& project ( - const sample_type& samp, - scalar_type& projection_error - ) const; - /*! - requires - - out_vector_size() != 0 - ensures - - This function returns project(samp) - (i.e. it returns the same thing as the above project() function) - - #projection_error == the square of the distance between the point samp - gets projected onto and samp's true image in kernel feature space. - That is, this value is equal to: - pow(convert_to_distance_function(project(samp))(samp),2) - !*/ - - template <typename EXP> - const decision_function<kernel_type> convert_to_decision_function ( - const matrix_exp<EXP>& vect - ) const; - /*! - requires - - is_vector(vect) == true - - vect.size() == out_vector_size() - - out_vector_size() != 0 - ensures - - This function interprets the given vector as a point in the kernel feature space defined - by this empirical_kernel_map. The return value of this function is a decision - function, DF, that represents the given vector in the following sense: - - for all possible sample_type objects, S, it is the case that DF(S) == dot(project(S), vect) - (i.e. the returned decision function computes dot products, in kernel feature space, - between vect and any argument you give it. Note also that this equality is exact, even - for sample_type objects not in the span of the basis samples.) - - DF.kernel_function == get_kernel() - - DF.b == 0 - - DF.basis_vectors == these will be the basis samples given to the previous call to load(). Note - that it is possible for there to be fewer basis_vectors than basis samples given to load(). - - DF.basis_vectors.size() == basis_size() - !*/ - - template <typename EXP> - const distance_function<kernel_type> convert_to_distance_function ( - const matrix_exp<EXP>& vect - ) const - /*! - requires - - is_vector(vect) == true - - vect.size() == out_vector_size() - - out_vector_size() != 0 - ensures - - This function interprets the given vector as a point in the kernel feature space defined - by this empirical_kernel_map. The return value of this function is a distance - function, DF, that represents the given vector in the following sense: - - for any sample_type object S, the following equality is approximately true: - - DF(S) == length(project(S) - vect) - (i.e. the returned distance function computes distances, in kernel feature space, - between vect and any argument you give it. ) - - The approximation error in the above equality will be zero (within rounding error) - if S is within the span of the set of basis samples given to the load() function. - If it is not then there will be some approximation error. Note that all the basis - samples are always within their own span. So the equality is always exact for the - samples given to the load() function. Note further that the distance computed - by DF(S) is always the correct distance in kernel feature space between vect and - the true projection of S. That is, the above equality is approximate only because - of potential error in the project() function, not in DF(S). - - DF.kernel_function == get_kernel() - - DF.b == dot(vect,vect) - - DF.basis_vectors == these will be the basis samples given to the previous call to load(). Note - that it is possible for there to be fewer basis_vectors than basis samples given to load(). - - DF.basis_vectors.size() == basis_size() - !*/ - - const projection_function<kernel_type> get_projection_function ( - ) const; - /*! - requires - - out_vector_size() != 0 - ensures - - returns a projection_function, PF, that computes the same projection as project(). - That is, calling PF() on any sample will produce the same output vector as calling - this->project() on that sample. - - PF.basis_vectors.size() == basis_size() - !*/ - - const matrix<scalar_type,0,0,mem_manager_type> get_transformation_to ( - const empirical_kernel_map& target - ) const; - /*! - requires - - get_kernel() == target.get_kernel() - - out_vector_size() != 0 - - target.out_vector_size() != 0 - ensures - - A point in the kernel feature space defined by the kernel get_kernel() typically - has different representations with respect to different empirical_kernel_maps. - This function lets you obtain a transformation matrix that will allow you - to project between these different representations. That is, this function returns - a matrix M with the following properties: - - M maps vectors represented according to *this into the representation used by target. - - M.nr() == target.out_vector_size() - - M.nc() == this->out_vector_size() - - Let V be a vector of this->out_vector_size() length. Then define two distance_functions - DF1 = this->convert_to_distance_function(V) - DF2 = target.convert_to_distance_function(M*V) - - Then DF1(DF2) == 0 // i.e. the distance between these two points should be 0 - - That is, DF1 and DF2 both represent the same point in kernel feature space. Note - that the above equality is only approximate. If the vector V represents a point in - kernel space that isn't in the span of the basis samples used by target then the - equality is approximate. However, if it is in their span then the equality will - be exact. For example, if target's basis samples are a superset of the basis samples - used by *this then the equality will always be exact (within rounding error). - !*/ - - void get_transformation_to ( - const empirical_kernel_map& target, - matrix<scalar_type, 0, 0, mem_manager_type>& tmat, - projection_function<kernel_type>& partial_projection - ) const; - /*! - requires - - get_kernel() == target.get_kernel() - - out_vector_size() != 0 - - target.out_vector_size() != 0 - - basis_size() < target.basis_size() - - for all i < basis_size(): (*this)[i] == target[i] - i.e. target must contain a superset of the basis vectors contained in *this. Moreover, - it must contain them in the same order. - ensures - - The single argument version of get_transformation_to() allows you to project - vectors from one empirical_kernel_map representation to another. This version - provides a somewhat different capability. Assuming target's basis vectors form a - superset of *this's basis vectors then this form of get_transformation_to() allows - you to reuse a vector from *this ekm to speed up the projection performed by target. - The defining relation is given below. - - for any sample S: - - target.project(S) == #tmat * this->project(S) + #partial_projection(S) - (this is always true to within rounding error for any S) - - #partial_projection.basis_vectors.size() == target.basis_vectors.size() - this->basis_vectors.size() - - #tmat.nr() == target.out_vector_size() - - #tmat.nc() == this->out_vector_size() - !*/ - - void swap ( - empirical_kernel_map& item - ); - /*! - ensures - - swaps the state of *this and item - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename kernel_type - > - void swap ( - empirical_kernel_map<kernel_type>& a, - empirical_kernel_map<kernel_type>& b - ) { a.swap(b); } - /*! - provides a global swap function - !*/ - - template < - typename kernel_type - > - void serialize ( - const empirical_kernel_map<kernel_type>& item, - std::ostream& out - ); - /*! - provides serialization support for empirical_kernel_map objects - !*/ - - template < - typename kernel_type - > - void deserialize ( - empirical_kernel_map<kernel_type>& item, - std::istream& in - ); - /*! - provides serialization support for empirical_kernel_map objects - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_EMPIRICAL_KERNEl_MAP_ABSTRACT_H_ - diff --git a/ml/dlib/dlib/svm/feature_ranking.h b/ml/dlib/dlib/svm/feature_ranking.h deleted file mode 100644 index f6324fe3d..000000000 --- a/ml/dlib/dlib/svm/feature_ranking.h +++ /dev/null @@ -1,477 +0,0 @@ -// Copyright (C) 2008 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_KERNEL_FEATURE_RANKINg_H_ -#define DLIB_KERNEL_FEATURE_RANKINg_H_ - -#include <vector> -#include <limits> - -#include "feature_ranking_abstract.h" -#include "kcentroid.h" -#include "../optimization.h" -#include "../statistics.h" -#include <iostream> - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename kernel_type, - typename sample_matrix_type, - typename label_matrix_type - > - matrix<typename kernel_type::scalar_type,0,2,typename kernel_type::mem_manager_type> rank_features_impl ( - const kcentroid<kernel_type>& kc, - const sample_matrix_type& samples, - const label_matrix_type& labels - ) - { - /* - This function ranks features by doing recursive feature elimination - - */ - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::mem_manager_type mm; - - - // make sure requires clause is not broken - DLIB_ASSERT(is_binary_classification_problem(samples, labels) == true, - "\tmatrix rank_features()" - << "\n\t you have given invalid arguments to this function" - ); - - matrix<scalar_type,0,2,mm> results(samples(0).nr(), 2); - matrix<scalar_type,sample_matrix_type::type::NR,1,mm> mask(samples(0).nr()); - set_all_elements(mask,1); - - // figure out what the separation is between the two centroids when all the features are - // present. - scalar_type first_separation; - { - kcentroid<kernel_type> c1(kc); - kcentroid<kernel_type> c2(kc); - // find the centers of each class - for (long s = 0; s < samples.size(); ++s) - { - if (labels(s) < 0) - { - c1.train(samples(s)); - } - else - { - c2.train(samples(s)); - } - - } - first_separation = c1(c2); - } - - - using namespace std; - - for (long i = results.nr()-1; i >= 0; --i) - { - long worst_feature_idx = 0; - scalar_type worst_feature_score = -std::numeric_limits<scalar_type>::infinity(); - - // figure out which feature to remove next - for (long j = 0; j < mask.size(); ++j) - { - // skip features we have already removed - if (mask(j) == 0) - continue; - - kcentroid<kernel_type> c1(kc); - kcentroid<kernel_type> c2(kc); - - // temporarily remove this feature from the working set of features - mask(j) = 0; - - // find the centers of each class - for (long s = 0; s < samples.size(); ++s) - { - if (labels(s) < 0) - { - c1.train(pointwise_multiply(samples(s),mask)); - } - else - { - c2.train(pointwise_multiply(samples(s),mask)); - } - - } - - // find the distance between the two centroids and use that - // as the score - const double score = c1(c2); - - if (score > worst_feature_score) - { - worst_feature_score = score; - worst_feature_idx = j; - } - - // add this feature back to the working set of features - mask(j) = 1; - - } - - // now that we know what the next worst feature is record it - mask(worst_feature_idx) = 0; - results(i,0) = worst_feature_idx; - results(i,1) = worst_feature_score; - } - - // now normalize the results - const scalar_type max_separation = std::max(max(colm(results,1)), first_separation); - set_colm(results,1) = colm(results,1)/max_separation; - for (long r = 0; r < results.nr()-1; ++r) - { - results(r,1) = results(r+1,1); - } - results(results.nr()-1,1) = first_separation/max_separation; - - return results; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename kernel_type, - typename sample_matrix_type, - typename label_matrix_type - > - matrix<typename kernel_type::scalar_type,0,2,typename kernel_type::mem_manager_type> rank_features ( - const kcentroid<kernel_type>& kc, - const sample_matrix_type& samples, - const label_matrix_type& labels - ) - { - return rank_features_impl(kc, mat(samples), mat(labels)); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename kernel_type, - typename sample_matrix_type, - typename label_matrix_type - > - matrix<typename kernel_type::scalar_type,0,2,typename kernel_type::mem_manager_type> rank_features_impl ( - const kcentroid<kernel_type>& kc, - const sample_matrix_type& samples, - const label_matrix_type& labels, - const long num_features - ) - { - /* - This function ranks features by doing recursive feature addition - - */ - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::mem_manager_type mm; - - // make sure requires clause is not broken - DLIB_ASSERT(is_binary_classification_problem(samples, labels) == true, - "\tmatrix rank_features()" - << "\n\t you have given invalid arguments to this function" - ); - DLIB_ASSERT(0 < num_features && num_features <= samples(0).nr(), - "\tmatrix rank_features()" - << "\n\t you have given invalid arguments to this function" - << "\n\t num_features: " << num_features - << "\n\t samples(0).nr(): " << samples(0).nr() - ); - - matrix<scalar_type,0,2,mm> results(num_features, 2); - matrix<scalar_type,sample_matrix_type::type::NR,1,mm> mask(samples(0).nr()); - set_all_elements(mask,0); - - using namespace std; - - for (long i = 0; i < results.nr(); ++i) - { - long best_feature_idx = 0; - scalar_type best_feature_score = -std::numeric_limits<scalar_type>::infinity(); - - // figure out which feature to add next - for (long j = 0; j < mask.size(); ++j) - { - // skip features we have already added - if (mask(j) == 1) - continue; - - kcentroid<kernel_type> c1(kc); - kcentroid<kernel_type> c2(kc); - - // temporarily add this feature to the working set of features - mask(j) = 1; - - // find the centers of each class - for (long s = 0; s < samples.size(); ++s) - { - if (labels(s) < 0) - { - c1.train(pointwise_multiply(samples(s),mask)); - } - else - { - c2.train(pointwise_multiply(samples(s),mask)); - } - - } - - // find the distance between the two centroids and use that - // as the score - const double score = c1(c2); - - if (score > best_feature_score) - { - best_feature_score = score; - best_feature_idx = j; - } - - // take this feature back out of the working set of features - mask(j) = 0; - - } - - // now that we know what the next best feature is record it - mask(best_feature_idx) = 1; - results(i,0) = best_feature_idx; - results(i,1) = best_feature_score; - } - - // now normalize the results - set_colm(results,1) = colm(results,1)/max(colm(results,1)); - - return results; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename kernel_type, - typename sample_matrix_type, - typename label_matrix_type - > - matrix<typename kernel_type::scalar_type,0,2,typename kernel_type::mem_manager_type> rank_features ( - const kcentroid<kernel_type>& kc, - const sample_matrix_type& samples, - const label_matrix_type& labels, - const long num_features - ) - { - if (mat(samples).nr() > 0 && num_features == mat(samples)(0).nr()) - { - // if we are going to rank them all then might as well do the recursive feature elimination version - return rank_features_impl(kc, mat(samples), mat(labels)); - } - else - { - return rank_features_impl(kc, mat(samples), mat(labels), num_features); - } - } - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - namespace rank_features_helpers - { - template < - typename K, - typename sample_matrix_type, - typename label_matrix_type - > - typename K::scalar_type centroid_gap ( - const kcentroid<K>& kc, - const sample_matrix_type& samples, - const label_matrix_type& labels - ) - { - kcentroid<K> kc1(kc); - kcentroid<K> kc2(kc); - - // toss all the samples into our kcentroids - for (long i = 0; i < samples.size(); ++i) - { - if (labels(i) > 0) - kc1.train(samples(i)); - else - kc2.train(samples(i)); - } - - // now return the separation between the mean of these two centroids - return kc1(kc2); - } - - template < - typename sample_matrix_type, - typename label_matrix_type - > - class test - { - typedef typename sample_matrix_type::type sample_type; - typedef typename sample_type::type scalar_type; - typedef typename sample_type::mem_manager_type mem_manager_type; - - public: - test ( - const sample_matrix_type& samples_, - const label_matrix_type& labels_, - unsigned long num_sv_, - bool verbose_ - ) : samples(samples_), labels(labels_), num_sv(num_sv_), verbose(verbose_) - { - } - - double operator() ( - double gamma - ) const - { - using namespace std; - - // we are doing the optimization in log space so don't forget to convert back to normal space - gamma = std::exp(gamma); - - typedef radial_basis_kernel<sample_type> kernel_type; - // Make a kcentroid and find out what the gap is at the current gamma. Try to pick a reasonable - // tolerance. - const double tolerance = std::min(gamma*0.01, 0.01); - const kernel_type kern(gamma); - kcentroid<kernel_type> kc(kern, tolerance, num_sv); - scalar_type temp = centroid_gap(kc, samples, labels); - - if (verbose) - { - cout << "\rChecking goodness of gamma = " << gamma << ". Goodness = " - << temp << " " << flush; - } - return temp; - } - - const sample_matrix_type& samples; - const label_matrix_type& labels; - unsigned long num_sv; - bool verbose; - - }; - - template < - typename sample_matrix_type, - typename label_matrix_type - > - double find_gamma_with_big_centroid_gap_impl ( - const sample_matrix_type& samples, - const label_matrix_type& labels, - double initial_gamma, - unsigned long num_sv, - bool verbose - ) - { - using namespace std; - - if (verbose) - { - cout << endl; - } - - test<sample_matrix_type, label_matrix_type> funct(samples, labels, num_sv, verbose); - double best_gamma = std::log(initial_gamma); - double goodness = find_max_single_variable(funct, best_gamma, -15, 15, 1e-3, 100); - - if (verbose) - { - cout << "\rBest gamma = " << std::exp(best_gamma) << ". Goodness = " - << goodness << " " << endl; - } - - return std::exp(best_gamma); - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename sample_matrix_type, - typename label_matrix_type - > - double find_gamma_with_big_centroid_gap ( - const sample_matrix_type& samples, - const label_matrix_type& labels, - double initial_gamma = 0.1, - unsigned long num_sv = 40 - ) - { - DLIB_ASSERT(initial_gamma > 0 && num_sv > 0 && is_binary_classification_problem(samples, labels), - "\t double find_gamma_with_big_centroid_gap()" - << "\n\t initial_gamma: " << initial_gamma - << "\n\t num_sv: " << num_sv - << "\n\t is_binary_classification_problem(): " << is_binary_classification_problem(samples, labels) - ); - - return rank_features_helpers::find_gamma_with_big_centroid_gap_impl(mat(samples), - mat(labels), - initial_gamma, - num_sv, - false); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename sample_matrix_type, - typename label_matrix_type - > - double verbose_find_gamma_with_big_centroid_gap ( - const sample_matrix_type& samples, - const label_matrix_type& labels, - double initial_gamma = 0.1, - unsigned long num_sv = 40 - ) - { - DLIB_ASSERT(initial_gamma > 0 && num_sv > 0 && is_binary_classification_problem(samples, labels), - "\t double verbose_find_gamma_with_big_centroid_gap()" - << "\n\t initial_gamma: " << initial_gamma - << "\n\t num_sv: " << num_sv - << "\n\t is_binary_classification_problem(): " << is_binary_classification_problem(samples, labels) - ); - - return rank_features_helpers::find_gamma_with_big_centroid_gap_impl(mat(samples), - mat(labels), - initial_gamma, - num_sv, - true); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename vector_type - > - double compute_mean_squared_distance ( - const vector_type& samples - ) - { - running_stats<double> rs; - for (unsigned long i = 0; i < samples.size(); ++i) - { - for (unsigned long j = i+1; j < samples.size(); ++j) - { - rs.add(length_squared(samples[i] - samples[j])); - } - } - - return rs.mean(); - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_KERNEL_FEATURE_RANKINg_H_ - - diff --git a/ml/dlib/dlib/svm/feature_ranking_abstract.h b/ml/dlib/dlib/svm/feature_ranking_abstract.h deleted file mode 100644 index 5a6fd3bb9..000000000 --- a/ml/dlib/dlib/svm/feature_ranking_abstract.h +++ /dev/null @@ -1,136 +0,0 @@ -// Copyright (C) 2008 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_KERNEL_FEATURE_RANKINg_ABSTRACT_H_ -#ifdef DLIB_KERNEL_FEATURE_RANKINg_ABSTRACT_H_ - -#include <vector> -#include <limits> - -#include "svm_abstract.h" -#include "kcentroid_abstract.h" -#include "../is_kind.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename kernel_type, - typename sample_matrix_type, - typename label_matrix_type - > - matrix<typename kernel_type::scalar_type> rank_features ( - const kcentroid<kernel_type>& kc, - const sample_matrix_type& samples, - const label_matrix_type& labels, - const long num_features = samples(0).nr() - ); - /*! - requires - - sample_matrix_type == a matrix or something convertible to a matrix via mat() - - label_matrix_type == a matrix or something convertible to a matrix via mat() - - is_binary_classification_problem(samples, labels) == true - - kc.train(samples(0)) must be a valid expression. This means that - kc must use a kernel type that is capable of operating on the - contents of the samples matrix - - 0 < num_features <= samples(0).nr() - ensures - - Let Class1 denote the centroid of all the samples with labels that are < 0 - - Let Class2 denote the centroid of all the samples with labels that are > 0 - - finds a ranking of the features where the best features come first. This - function does this by computing the distance between the centroid of the Class1 - samples and the Class2 samples in kernel defined feature space. - Good features are then ones that result in the biggest separation between - the two centroids of Class1 and Class2. - - Uses the kc object to compute the centroids of the two classes - - returns a ranking matrix R where: - - R.nr() == num_features - - r.nc() == 2 - - R(i,0) == the index of the ith best feature according to our ranking. - (e.g. samples(n)(R(0,0)) is the best feature from sample(n) and - samples(n)(R(1,0)) is the second best, samples(n)(R(2,0)) the - third best and so on) - - R(i,1) == a number that indicates how much separation exists between - the two centroids when features 0 through i are used. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename sample_matrix_type, - typename label_matrix_type - > - double find_gamma_with_big_centroid_gap ( - const sample_matrix_type& samples, - const label_matrix_type& labels, - double initial_gamma = 0.1, - unsigned long num_sv = 40 - ); - /*! - requires - - initial_gamma > 0 - - num_sv > 0 - - is_binary_classification_problem(samples, labels) == true - ensures - - This is a function that tries to pick a reasonable default value for the gamma - parameter of the radial_basis_kernel. It picks the parameter that gives the - largest separation between the centroids, in kernel feature space, of two classes - of data. It does this using the kcentroid object and it sets the kcentroid up - to use num_sv dictionary vectors. - - This function does a search for the best gamma and the search starts with - the value given by initial_gamma. Better initial guesses will give - better results since the routine may get stuck in a local minima. - - returns the value of gamma that results in the largest separation. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename sample_matrix_type, - typename label_matrix_type - > - double verbose_find_gamma_with_big_centroid_gap ( - const sample_matrix_type& samples, - const label_matrix_type& labels, - double initial_gamma = 0.1, - unsigned long num_sv = 40 - ); - /*! - requires - - initial_gamma > 0 - - num_sv > 0 - - is_binary_classification_problem(samples, labels) == true - ensures - - This function does the same exact thing as the above find_gamma_with_big_centroid_gap() - except that it is also verbose in the sense that it will print status messages to - standard out during its processing. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename vector_type - > - double compute_mean_squared_distance ( - const vector_type& samples - ); - /*! - requires - - vector_type is something with an interface compatible with std::vector. - Additionally, it must in turn contain dlib::matrix types which contain - scalars such as float or double values. - - for all valid i: is_vector(samples[i]) == true - ensures - - computes the average value of the squares of all the pairwise - distances between every element of samples. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_KERNEL_FEATURE_RANKINg_ABSTRACT_H_ - - - diff --git a/ml/dlib/dlib/svm/function.h b/ml/dlib/dlib/svm/function.h deleted file mode 100644 index f5a62a9f7..000000000 --- a/ml/dlib/dlib/svm/function.h +++ /dev/null @@ -1,882 +0,0 @@ -// Copyright (C) 2007 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_SVm_FUNCTION -#define DLIB_SVm_FUNCTION - -#include "function_abstract.h" -#include <cmath> -#include <limits> -#include <sstream> -#include "../matrix.h" -#include "../algs.h" -#include "../serialize.h" -#include "../rand.h" -#include "../statistics.h" -#include "kernel_matrix.h" -#include "kernel.h" -#include "sparse_kernel.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename K - > - struct decision_function - { - typedef K kernel_type; - typedef typename K::scalar_type scalar_type; - typedef typename K::scalar_type result_type; - typedef typename K::sample_type sample_type; - typedef typename K::mem_manager_type mem_manager_type; - - typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type; - typedef matrix<sample_type,0,1,mem_manager_type> sample_vector_type; - - scalar_vector_type alpha; - scalar_type b; - K kernel_function; - sample_vector_type basis_vectors; - - decision_function ( - ) : b(0), kernel_function(K()) {} - - decision_function ( - const decision_function& d - ) : - alpha(d.alpha), - b(d.b), - kernel_function(d.kernel_function), - basis_vectors(d.basis_vectors) - {} - - decision_function ( - const scalar_vector_type& alpha_, - const scalar_type& b_, - const K& kernel_function_, - const sample_vector_type& basis_vectors_ - ) : - alpha(alpha_), - b(b_), - kernel_function(kernel_function_), - basis_vectors(basis_vectors_) - {} - - result_type operator() ( - const sample_type& x - ) const - { - result_type temp = 0; - for (long i = 0; i < alpha.nr(); ++i) - temp += alpha(i) * kernel_function(x,basis_vectors(i)); - - return temp - b; - } - }; - - template < - typename K - > - void serialize ( - const decision_function<K>& item, - std::ostream& out - ) - { - try - { - serialize(item.alpha, out); - serialize(item.b, out); - serialize(item.kernel_function, out); - serialize(item.basis_vectors, out); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while serializing object of type decision_function"); - } - } - - template < - typename K - > - void deserialize ( - decision_function<K>& item, - std::istream& in - ) - { - try - { - deserialize(item.alpha, in); - deserialize(item.b, in); - deserialize(item.kernel_function, in); - deserialize(item.basis_vectors, in); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while deserializing object of type decision_function"); - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename function_type - > - struct probabilistic_function - { - typedef typename function_type::scalar_type scalar_type; - typedef typename function_type::result_type result_type; - typedef typename function_type::sample_type sample_type; - typedef typename function_type::mem_manager_type mem_manager_type; - - scalar_type alpha; - scalar_type beta; - function_type decision_funct; - - probabilistic_function ( - ) : alpha(0), beta(0), decision_funct(function_type()) {} - - probabilistic_function ( - const probabilistic_function& d - ) : - alpha(d.alpha), - beta(d.beta), - decision_funct(d.decision_funct) - {} - - probabilistic_function ( - const scalar_type a_, - const scalar_type b_, - const function_type& decision_funct_ - ) : - alpha(a_), - beta(b_), - decision_funct(decision_funct_) - {} - - result_type operator() ( - const sample_type& x - ) const - { - result_type f = decision_funct(x); - return 1/(1 + std::exp(alpha*f + beta)); - } - }; - - template < - typename function_type - > - void serialize ( - const probabilistic_function<function_type>& item, - std::ostream& out - ) - { - try - { - serialize(item.alpha, out); - serialize(item.beta, out); - serialize(item.decision_funct, out); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while serializing object of type probabilistic_function"); - } - } - - template < - typename function_type - > - void deserialize ( - probabilistic_function<function_type>& item, - std::istream& in - ) - { - try - { - deserialize(item.alpha, in); - deserialize(item.beta, in); - deserialize(item.decision_funct, in); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while deserializing object of type probabilistic_function"); - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename K - > - struct probabilistic_decision_function - { - typedef K kernel_type; - typedef typename K::scalar_type scalar_type; - typedef typename K::scalar_type result_type; - typedef typename K::sample_type sample_type; - typedef typename K::mem_manager_type mem_manager_type; - - scalar_type alpha; - scalar_type beta; - decision_function<K> decision_funct; - - probabilistic_decision_function ( - ) : alpha(0), beta(0), decision_funct(decision_function<K>()) {} - - probabilistic_decision_function ( - const probabilistic_function<decision_function<K> >& d - ) : - alpha(d.alpha), - beta(d.beta), - decision_funct(d.decision_funct) - {} - - probabilistic_decision_function ( - const probabilistic_decision_function& d - ) : - alpha(d.alpha), - beta(d.beta), - decision_funct(d.decision_funct) - {} - - probabilistic_decision_function ( - const scalar_type a_, - const scalar_type b_, - const decision_function<K>& decision_funct_ - ) : - alpha(a_), - beta(b_), - decision_funct(decision_funct_) - {} - - result_type operator() ( - const sample_type& x - ) const - { - result_type f = decision_funct(x); - return 1/(1 + std::exp(alpha*f + beta)); - } - }; - - template < - typename K - > - void serialize ( - const probabilistic_decision_function<K>& item, - std::ostream& out - ) - { - try - { - serialize(item.alpha, out); - serialize(item.beta, out); - serialize(item.decision_funct, out); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while serializing object of type probabilistic_decision_function"); - } - } - - template < - typename K - > - void deserialize ( - probabilistic_decision_function<K>& item, - std::istream& in - ) - { - try - { - deserialize(item.alpha, in); - deserialize(item.beta, in); - deserialize(item.decision_funct, in); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while deserializing object of type probabilistic_decision_function"); - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename K - > - class distance_function - { - public: - typedef K kernel_type; - typedef typename K::scalar_type scalar_type; - typedef typename K::scalar_type result_type; - typedef typename K::sample_type sample_type; - typedef typename K::mem_manager_type mem_manager_type; - - typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type; - typedef matrix<sample_type,0,1,mem_manager_type> sample_vector_type; - - - distance_function ( - ) : b(0), kernel_function(K()) {} - - explicit distance_function ( - const kernel_type& kern - ) : b(0), kernel_function(kern) {} - - distance_function ( - const kernel_type& kern, - const sample_type& samp - ) : - alpha(ones_matrix<scalar_type>(1,1)), - b(kern(samp,samp)), - kernel_function(kern) - { - basis_vectors.set_size(1,1); - basis_vectors(0) = samp; - } - - distance_function ( - const decision_function<K>& f - ) : - alpha(f.alpha), - b(trans(f.alpha)*kernel_matrix(f.kernel_function,f.basis_vectors)*f.alpha), - kernel_function(f.kernel_function), - basis_vectors(f.basis_vectors) - { - // make sure requires clause is not broken - DLIB_ASSERT(f.alpha.size() == f.basis_vectors.size(), - "\t distance_function(f)" - << "\n\t The supplied decision_function is invalid." - << "\n\t f.alpha.size(): " << f.alpha.size() - << "\n\t f.basis_vectors.size(): " << f.basis_vectors.size() - ); - } - - distance_function ( - const distance_function& d - ) : - alpha(d.alpha), - b(d.b), - kernel_function(d.kernel_function), - basis_vectors(d.basis_vectors) - { - } - - distance_function ( - const scalar_vector_type& alpha_, - const scalar_type& b_, - const K& kernel_function_, - const sample_vector_type& basis_vectors_ - ) : - alpha(alpha_), - b(b_), - kernel_function(kernel_function_), - basis_vectors(basis_vectors_) - { - // make sure requires clause is not broken - DLIB_ASSERT(alpha_.size() == basis_vectors_.size(), - "\t distance_function()" - << "\n\t The supplied arguments are invalid." - << "\n\t alpha_.size(): " << alpha_.size() - << "\n\t basis_vectors_.size(): " << basis_vectors_.size() - ); - } - - distance_function ( - const scalar_vector_type& alpha_, - const K& kernel_function_, - const sample_vector_type& basis_vectors_ - ) : - alpha(alpha_), - b(trans(alpha)*kernel_matrix(kernel_function_,basis_vectors_)*alpha), - kernel_function(kernel_function_), - basis_vectors(basis_vectors_) - { - // make sure requires clause is not broken - DLIB_ASSERT(alpha_.size() == basis_vectors_.size(), - "\t distance_function()" - << "\n\t The supplied arguments are invalid." - << "\n\t alpha_.size(): " << alpha_.size() - << "\n\t basis_vectors_.size(): " << basis_vectors_.size() - ); - } - - const scalar_vector_type& get_alpha ( - ) const { return alpha; } - - const scalar_type& get_squared_norm ( - ) const { return b; } - - const K& get_kernel( - ) const { return kernel_function; } - - const sample_vector_type& get_basis_vectors ( - ) const { return basis_vectors; } - - result_type operator() ( - const sample_type& x - ) const - { - result_type temp = 0; - for (long i = 0; i < alpha.nr(); ++i) - temp += alpha(i) * kernel_function(x,basis_vectors(i)); - - temp = b + kernel_function(x,x) - 2*temp; - if (temp > 0) - return std::sqrt(temp); - else - return 0; - } - - result_type operator() ( - const distance_function& x - ) const - { - result_type temp = 0; - for (long i = 0; i < alpha.nr(); ++i) - for (long j = 0; j < x.alpha.nr(); ++j) - temp += alpha(i)*x.alpha(j) * kernel_function(basis_vectors(i), x.basis_vectors(j)); - - temp = b + x.b - 2*temp; - if (temp > 0) - return std::sqrt(temp); - else - return 0; - } - - distance_function operator* ( - const scalar_type& val - ) const - { - return distance_function(val*alpha, - val*val*b, - kernel_function, - basis_vectors); - } - - distance_function operator/ ( - const scalar_type& val - ) const - { - return distance_function(alpha/val, - b/val/val, - kernel_function, - basis_vectors); - } - - distance_function operator+ ( - const distance_function& rhs - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(get_kernel() == rhs.get_kernel(), - "\t distance_function distance_function::operator+()" - << "\n\t You can only add two distance_functions together if they use the same kernel." - ); - - if (alpha.size() == 0) - return rhs; - else if (rhs.alpha.size() == 0) - return *this; - else - return distance_function(join_cols(alpha, rhs.alpha), - b + rhs.b + 2*trans(alpha)*kernel_matrix(kernel_function,basis_vectors,rhs.basis_vectors)*rhs.alpha, - kernel_function, - join_cols(basis_vectors, rhs.basis_vectors)); - } - - distance_function operator- ( - const distance_function& rhs - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(get_kernel() == rhs.get_kernel(), - "\t distance_function distance_function::operator-()" - << "\n\t You can only subtract two distance_functions if they use the same kernel." - ); - - if (alpha.size() == 0 && rhs.alpha.size() == 0) - return distance_function(kernel_function); - else if (alpha.size() != 0 && rhs.alpha.size() == 0) - return *this; - else if (alpha.size() == 0 && rhs.alpha.size() != 0) - return -1*rhs; - else - return distance_function(join_cols(alpha, -rhs.alpha), - b + rhs.b - 2*trans(alpha)*kernel_matrix(kernel_function,basis_vectors,rhs.basis_vectors)*rhs.alpha, - kernel_function, - join_cols(basis_vectors, rhs.basis_vectors)); - } - - private: - - scalar_vector_type alpha; - scalar_type b; - K kernel_function; - sample_vector_type basis_vectors; - - }; - - template < - typename K - > - distance_function<K> operator* ( - const typename K::scalar_type& val, - const distance_function<K>& df - ) { return df*val; } - - template < - typename K - > - void serialize ( - const distance_function<K>& item, - std::ostream& out - ) - { - try - { - serialize(item.alpha, out); - serialize(item.b, out); - serialize(item.kernel_function, out); - serialize(item.basis_vectors, out); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while serializing object of type distance_function"); - } - } - - template < - typename K - > - void deserialize ( - distance_function<K>& item, - std::istream& in - ) - { - try - { - deserialize(item.alpha, in); - deserialize(item.b, in); - deserialize(item.kernel_function, in); - deserialize(item.basis_vectors, in); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while deserializing object of type distance_function"); - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename function_type, - typename normalizer_type = vector_normalizer<typename function_type::sample_type> - > - struct normalized_function - { - typedef typename function_type::result_type result_type; - typedef typename function_type::sample_type sample_type; - typedef typename function_type::mem_manager_type mem_manager_type; - - normalizer_type normalizer; - function_type function; - - normalized_function ( - ){} - - normalized_function ( - const normalized_function& f - ) : - normalizer(f.normalizer), - function(f.function) - {} - - const std::vector<result_type> get_labels( - ) const { return function.get_labels(); } - - unsigned long number_of_classes ( - ) const { return function.number_of_classes(); } - - normalized_function ( - const vector_normalizer<sample_type>& normalizer_, - const function_type& funct - ) : normalizer(normalizer_), function(funct) {} - - result_type operator() ( - const sample_type& x - ) const { return function(normalizer(x)); } - }; - - template < - typename function_type, - typename normalizer_type - > - void serialize ( - const normalized_function<function_type,normalizer_type>& item, - std::ostream& out - ) - { - try - { - serialize(item.normalizer, out); - serialize(item.function, out); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while serializing object of type normalized_function"); - } - } - - template < - typename function_type, - typename normalizer_type - > - void deserialize ( - normalized_function<function_type,normalizer_type>& item, - std::istream& in - ) - { - try - { - deserialize(item.normalizer, in); - deserialize(item.function, in); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while deserializing object of type normalized_function"); - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename K - > - struct projection_function - { - typedef K kernel_type; - typedef typename K::scalar_type scalar_type; - typedef typename K::sample_type sample_type; - typedef typename K::mem_manager_type mem_manager_type; - - typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type; - typedef matrix<scalar_type,0,0,mem_manager_type> scalar_matrix_type; - typedef matrix<sample_type,0,1,mem_manager_type> sample_vector_type; - typedef scalar_vector_type result_type; - - scalar_matrix_type weights; - K kernel_function; - sample_vector_type basis_vectors; - - projection_function ( - ) {} - - projection_function ( - const projection_function& f - ) : weights(f.weights), kernel_function(f.kernel_function), basis_vectors(f.basis_vectors) {} - - projection_function ( - const scalar_matrix_type& weights_, - const K& kernel_function_, - const sample_vector_type& basis_vectors_ - ) : weights(weights_), kernel_function(kernel_function_), basis_vectors(basis_vectors_) {} - - long out_vector_size ( - ) const { return weights.nr(); } - - const result_type& operator() ( - const sample_type& x - ) const - { - // Run the x sample through all the basis functions we have and then - // multiply it by the weights matrix and return the result. Note that - // the temp vectors are here to avoid reallocating their memory every - // time this function is called. - temp1 = kernel_matrix(kernel_function, basis_vectors, x); - temp2 = weights*temp1; - return temp2; - } - - private: - mutable result_type temp1, temp2; - }; - - template < - typename K - > - void serialize ( - const projection_function<K>& item, - std::ostream& out - ) - { - try - { - serialize(item.weights, out); - serialize(item.kernel_function, out); - serialize(item.basis_vectors, out); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while serializing object of type projection_function"); - } - } - - template < - typename K - > - void deserialize ( - projection_function<K>& item, - std::istream& in - ) - { - try - { - deserialize(item.weights, in); - deserialize(item.kernel_function, in); - deserialize(item.basis_vectors, in); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while deserializing object of type projection_function"); - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename K, - typename result_type_ = typename K::scalar_type - > - struct multiclass_linear_decision_function - { - typedef result_type_ result_type; - - typedef K kernel_type; - typedef typename K::scalar_type scalar_type; - typedef typename K::sample_type sample_type; - typedef typename K::mem_manager_type mem_manager_type; - - typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type; - typedef matrix<scalar_type,0,0,mem_manager_type> scalar_matrix_type; - - // You are getting a compiler error on this line because you supplied a non-linear kernel - // to the multiclass_linear_decision_function object. You have to use one of the linear - // kernels with this object. - COMPILE_TIME_ASSERT((is_same_type<K, linear_kernel<sample_type> >::value || - is_same_type<K, sparse_linear_kernel<sample_type> >::value )); - - - scalar_matrix_type weights; - scalar_vector_type b; - std::vector<result_type> labels; - - const std::vector<result_type>& get_labels( - ) const { return labels; } - - unsigned long number_of_classes ( - ) const { return labels.size(); } - - std::pair<result_type, scalar_type> predict ( - const sample_type& x - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(weights.size() > 0 && - weights.nr() == (long)number_of_classes() && - weights.nr() == b.size(), - "\t pair<result_type,scalar_type> multiclass_linear_decision_function::predict(x)" - << "\n\t This object must be properly initialized before you can use it." - << "\n\t weights.size(): " << weights.size() - << "\n\t weights.nr(): " << weights.nr() - << "\n\t number_of_classes(): " << number_of_classes() - ); - - // Rather than doing something like, best_idx = index_of_max(weights*x-b) - // we do the following somewhat more complex thing because this supports - // both sparse and dense samples. - scalar_type best_val = dot(rowm(weights,0),x) - b(0); - unsigned long best_idx = 0; - - for (unsigned long i = 1; i < labels.size(); ++i) - { - scalar_type temp = dot(rowm(weights,i),x) - b(i); - if (temp > best_val) - { - best_val = temp; - best_idx = i; - } - } - - return std::make_pair(labels[best_idx], best_val); - } - - result_type operator() ( - const sample_type& x - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(weights.size() > 0 && - weights.nr() == (long)number_of_classes() && - weights.nr() == b.size(), - "\t result_type multiclass_linear_decision_function::operator()(x)" - << "\n\t This object must be properly initialized before you can use it." - << "\n\t weights.size(): " << weights.size() - << "\n\t weights.nr(): " << weights.nr() - << "\n\t number_of_classes(): " << number_of_classes() - ); - - return predict(x).first; - } - }; - - template < - typename K, - typename result_type_ - > - void serialize ( - const multiclass_linear_decision_function<K,result_type_>& item, - std::ostream& out - ) - { - try - { - serialize(item.weights, out); - serialize(item.b, out); - serialize(item.labels, out); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while serializing object of type multiclass_linear_decision_function"); - } - } - - template < - typename K, - typename result_type_ - > - void deserialize ( - multiclass_linear_decision_function<K,result_type_>& item, - std::istream& in - ) - { - try - { - deserialize(item.weights, in); - deserialize(item.b, in); - deserialize(item.labels, in); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while deserializing object of type multiclass_linear_decision_function"); - } - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SVm_FUNCTION - - diff --git a/ml/dlib/dlib/svm/function_abstract.h b/ml/dlib/dlib/svm/function_abstract.h deleted file mode 100644 index 783a68c50..000000000 --- a/ml/dlib/dlib/svm/function_abstract.h +++ /dev/null @@ -1,997 +0,0 @@ -// Copyright (C) 2007 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_SVm_FUNCTION_ABSTRACT_ -#ifdef DLIB_SVm_FUNCTION_ABSTRACT_ - -#include <cmath> -#include <limits> -#include <sstream> -#include "../matrix/matrix_abstract.h" -#include "../algs.h" -#include "../serialize.h" -#include "../statistics/statistics_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename K - > - struct decision_function - { - /*! - REQUIREMENTS ON K - K must be a kernel function object type as defined at the - top of dlib/svm/kernel_abstract.h - - WHAT THIS OBJECT REPRESENTS - This object represents a classification or regression function that was - learned by a kernel based learning algorithm. Therefore, it is a function - object that takes a sample object and returns a scalar value. - - THREAD SAFETY - It is always safe to use distinct instances of this object in different - threads. However, when a single instance is shared between threads then - the following rules apply: - It is safe to call operator() on this object from multiple threads so - long as the kernel, K, is also threadsafe. This is because operator() - is a read-only operation. However, any operation that modifies a - decision_function is not threadsafe. - !*/ - - typedef K kernel_type; - typedef typename K::scalar_type scalar_type; - typedef typename K::scalar_type result_type; - typedef typename K::sample_type sample_type; - typedef typename K::mem_manager_type mem_manager_type; - - typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type; - typedef matrix<sample_type,0,1,mem_manager_type> sample_vector_type; - - scalar_vector_type alpha; - scalar_type b; - K kernel_function; - sample_vector_type basis_vectors; - - decision_function ( - ); - /*! - ensures - - #b == 0 - - #alpha.nr() == 0 - - #basis_vectors.nr() == 0 - !*/ - - decision_function ( - const decision_function& f - ); - /*! - ensures - - #*this is a copy of f - !*/ - - decision_function ( - const scalar_vector_type& alpha_, - const scalar_type& b_, - const K& kernel_function_, - const sample_vector_type& basis_vectors_ - ) : alpha(alpha_), b(b_), kernel_function(kernel_function_), basis_vectors(basis_vectors_) {} - /*! - ensures - - populates the decision function with the given basis vectors, weights(i.e. alphas), - b term, and kernel function. - !*/ - - result_type operator() ( - const sample_type& x - ) const - /*! - ensures - - evaluates this sample according to the decision - function contained in this object. - !*/ - { - result_type temp = 0; - for (long i = 0; i < alpha.nr(); ++i) - temp += alpha(i) * kernel_function(x,basis_vectors(i)); - - return temp - b; - } - }; - - template < - typename K - > - void serialize ( - const decision_function<K>& item, - std::ostream& out - ); - /*! - provides serialization support for decision_function - !*/ - - template < - typename K - > - void deserialize ( - decision_function<K>& item, - std::istream& in - ); - /*! - provides serialization support for decision_function - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename function_type - > - struct probabilistic_function - { - /*! - REQUIREMENTS ON function_type - - function_type must be a function object with an overloaded - operator() similar to the other function objects defined in - this file. The operator() should return a scalar type such as - double or float. - - WHAT THIS OBJECT REPRESENTS - This object represents a binary decision function that returns an - estimate of the probability that a given sample is in the +1 class. - - THREAD SAFETY - It is always safe to use distinct instances of this object in different - threads. However, when a single instance is shared between threads then - the following rules apply: - It is safe to call operator() on this object from multiple threads so - long as decision_funct is also threadsafe. This is because operator() - is a read-only operation. However, any operation that modifies a - probabilistic_function is not threadsafe. - !*/ - - typedef typename function_type::scalar_type scalar_type; - typedef typename function_type::result_type result_type; - typedef typename function_type::sample_type sample_type; - typedef typename function_type::mem_manager_type mem_manager_type; - - scalar_type alpha; - scalar_type beta; - function_type decision_funct; - - probabilistic_function ( - ); - /*! - ensures - - #alpha == 0 - - #beta == 0 - - #decision_funct has its initial value - !*/ - - probabilistic_function ( - const probabilistic_function& f - ); - /*! - ensures - - #*this is a copy of f - !*/ - - probabilistic_function ( - const scalar_type a, - const scalar_type b, - const function_type& decision_funct_ - ) : alpha(a), beta(b), decision_funct(decision_funct_) {} - /*! - ensures - - populates the probabilistic decision function with the given alpha, beta, - and decision function. - !*/ - - result_type operator() ( - const sample_type& x - ) const - /*! - ensures - - returns a number P such that: - - 0 <= P <= 1 - - P represents the probability that sample x is from - the class +1 - !*/ - { - // Evaluate the normal decision function - result_type f = decision_funct(x); - // Now basically normalize the output so that it is a properly - // conditioned probability of x being in the +1 class given - // the output of the decision function. - return 1/(1 + std::exp(alpha*f + beta)); - } - }; - - template < - typename function_type - > - void serialize ( - const probabilistic_function<function_type>& item, - std::ostream& out - ); - /*! - provides serialization support for probabilistic_function - !*/ - - template < - typename function_type - > - void deserialize ( - probabilistic_function<function_type>& item, - std::istream& in - ); - /*! - provides serialization support for probabilistic_function - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename K - > - struct probabilistic_decision_function - { - /*! - REQUIREMENTS ON K - K must be a kernel function object type as defined at the - top of dlib/svm/kernel_abstract.h - - WHAT THIS OBJECT REPRESENTS - This object represents a binary decision function that returns an - estimate of the probability that a given sample is in the +1 class. - - Note that this object is essentially just a copy of - probabilistic_function but with the template argument - changed from being a function type to a kernel type. Therefore, this - type is just a convenient version of probabilistic_function - for the case where the decision function is a dlib::decision_function<K>. - - THREAD SAFETY - It is always safe to use distinct instances of this object in different - threads. However, when a single instance is shared between threads then - the following rules apply: - It is safe to call operator() on this object from multiple threads so - long as the kernel, K, is also threadsafe. This is because operator() - is a read-only operation. However, any operation that modifies a - probabilistic_decision_function is not threadsafe. - !*/ - - typedef K kernel_type; - typedef typename K::scalar_type scalar_type; - typedef typename K::scalar_type result_type; - typedef typename K::sample_type sample_type; - typedef typename K::mem_manager_type mem_manager_type; - - scalar_type alpha; - scalar_type beta; - decision_function<K> decision_funct; - - probabilistic_decision_function ( - ); - /*! - ensures - - #alpha == 0 - - #beta == 0 - - #decision_funct has its initial value - !*/ - - probabilistic_decision_function ( - const probabilistic_decision_function& f - ); - /*! - ensures - - #*this is a copy of f - !*/ - - probabilistic_decision_function ( - const probabilistic_function<decision_function<K> >& d - ); - /*! - ensures - - #*this is a copy of f - !*/ - - probabilistic_decision_function ( - const scalar_type a, - const scalar_type b, - const decision_function<K>& decision_funct_ - ) : alpha(a), beta(b), decision_funct(decision_funct_) {} - /*! - ensures - - populates the probabilistic decision function with the given alpha, beta, - and decision_function. - !*/ - - result_type operator() ( - const sample_type& x - ) const - /*! - ensures - - returns a number P such that: - - 0 <= P <= 1 - - P represents the probability that sample x is from - the class +1 - !*/ - { - // Evaluate the normal decision function - result_type f = decision_funct(x); - // Now basically normalize the output so that it is a properly - // conditioned probability of x being in the +1 class given - // the output of the decision function. - return 1/(1 + std::exp(alpha*f + beta)); - } - }; - - template < - typename K - > - void serialize ( - const probabilistic_decision_function<K>& item, - std::ostream& out - ); - /*! - provides serialization support for probabilistic_decision_function - !*/ - - template < - typename K - > - void deserialize ( - probabilistic_decision_function<K>& item, - std::istream& in - ); - /*! - provides serialization support for probabilistic_decision_function - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename K - > - class distance_function - { - /*! - REQUIREMENTS ON K - K must be a kernel function object type as defined at the - top of dlib/svm/kernel_abstract.h - - WHAT THIS OBJECT REPRESENTS - This object represents a point in kernel induced feature space. - You may use this object to find the distance from the point it - represents to points in input space as well as other points - represented by distance_functions. - - Specifically, if O() is the feature mapping associated with - the kernel used by this object. Then this object represents - the point: - sum alpha(i)*O(basis_vectors(i)) - - I.e. It represents a linear combination of the basis vectors where - the weights of the linear combination are stored in the alpha vector. - - THREAD SAFETY - It is always safe to use distinct instances of this object in different - threads. However, when a single instance is shared between threads then - the following rules apply: - It is safe to call the const members of this object from multiple - threads so long as the kernel, K, is also threadsafe. This is because - the const members are purely read-only operations. However, any - operation that modifies a distance_function is not threadsafe. - !*/ - - public: - typedef K kernel_type; - typedef typename K::scalar_type scalar_type; - typedef typename K::scalar_type result_type; - typedef typename K::sample_type sample_type; - typedef typename K::mem_manager_type mem_manager_type; - - typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type; - typedef matrix<sample_type,0,1,mem_manager_type> sample_vector_type; - - distance_function ( - ); - /*! - ensures - - #get_squared_norm() == 0 - - #get_alpha().size() == 0 - - #get_basis_vectors().size() == 0 - - #get_kernel() == K() (i.e. the default value of the kernel) - !*/ - - explicit distance_function ( - const kernel_type& kern - ); - /*! - ensures - - #get_squared_norm() == 0 - - #get_alpha().size() == 0 - - #get_basis_vectors().size() == 0 - - #get_kernel() == kern - !*/ - - distance_function ( - const kernel_type& kern, - const sample_type& samp - ); - /*! - ensures - - This object represents the point in kernel feature space which - corresponds directly to the given sample. In particular this means - that: - - #get_kernel() == kern - - #get_alpha() == a vector of length 1 which contains the value 1 - - #get_basis_vectors() == a vector of length 1 which contains samp - !*/ - - distance_function ( - const decision_function<K>& f - ); - /*! - ensures - - Every decision_function represents a point in kernel feature space along - with a bias value. This constructor discards the bias value and creates - a distance_function which represents the point associated with the given - decision_function f. In particular, this means: - - #get_alpha() == f.alpha - - #get_kernel() == f.kernel_function - - #get_basis_vectors() == f.basis_vectors - !*/ - - distance_function ( - const distance_function& f - ); - /*! - requires - - f is a valid distance_function. In particular, this means that - f.alpha.size() == f.basis_vectors.size() - ensures - - #*this is a copy of f - !*/ - - distance_function ( - const scalar_vector_type& alpha, - const scalar_type& squared_norm, - const K& kernel_function, - const sample_vector_type& basis_vectors - ); - /*! - requires - - alpha.size() == basis_vectors.size() - - squared_norm == trans(alpha)*kernel_matrix(kernel_function,basis_vectors)*alpha - (Basically, squared_norm needs to be set properly for this object to make sense. - You should prefer to use the following constructor which computes squared_norm for - you. This version is provided just in case you already know squared_norm and - don't want to spend CPU cycles to recompute it.) - ensures - - populates the distance function with the given basis vectors, weights(i.e. alphas), - squared_norm value, and kernel function. I.e. - - #get_alpha() == alpha - - #get_squared_norm() == squared_norm - - #get_kernel() == kernel_function - - #get_basis_vectors() == basis_vectors - !*/ - - distance_function ( - const scalar_vector_type& alpha, - const K& kernel_function, - const sample_vector_type& basis_vectors - ); - /*! - requires - - alpha.size() == basis_vectors.size() - ensures - - populates the distance function with the given basis vectors, weights(i.e. alphas), - and kernel function. The correct b value is computed automatically. I.e. - - #get_alpha() == alpha - - #get_squared_norm() == trans(alpha)*kernel_matrix(kernel_function,basis_vectors)*alpha - (i.e. get_squared_norm() will be automatically set to the correct value) - - #get_kernel() == kernel_function - - #get_basis_vectors() == basis_vectors - !*/ - - const scalar_vector_type& get_alpha ( - ) const; - /*! - ensures - - returns the set of weights on each basis vector in this object - !*/ - - const scalar_type& get_squared_norm ( - ) const; - /*! - ensures - - returns the squared norm of the point represented by this object. This value is - equal to the following expression: - trans(get_alpha()) * kernel_matrix(get_kernel(),get_basis_vectors()) * get_alpha() - !*/ - - const K& get_kernel( - ) const; - /*! - ensures - - returns the kernel used by this object. - !*/ - - const sample_vector_type& get_basis_vectors ( - ) const; - /*! - ensures - - returns the set of basis vectors contained in this object - !*/ - - result_type operator() ( - const sample_type& x - ) const; - /*! - ensures - - Let O(x) represent the point x projected into kernel induced feature space. - - let c == sum_over_i get_alpha()(i)*O(get_basis_vectors()(i)) == the point in kernel space that - this object represents. That is, c is the weighted sum of basis vectors. - - Then this object returns the distance between the point O(x) and c in kernel - space. - !*/ - - result_type operator() ( - const distance_function& x - ) const; - /*! - requires - - kernel_function == x.kernel_function - ensures - - returns the distance between the points in kernel space represented by *this and x. - !*/ - - distance_function operator* ( - const scalar_type& val - ) const; - /*! - ensures - - multiplies the point represented by *this by val and returns the result. In - particular, this function returns a decision_function DF such that: - - DF.get_basis_vectors() == get_basis_vectors() - - DF.get_kernel() == get_kernel() - - DF.get_alpha() == get_alpha() * val - !*/ - - distance_function operator/ ( - const scalar_type& val - ) const; - /*! - ensures - - divides the point represented by *this by val and returns the result. In - particular, this function returns a decision_function DF such that: - - DF.get_basis_vectors() == get_basis_vectors() - - DF.get_kernel() == get_kernel() - - DF.get_alpha() == get_alpha() / val - !*/ - - distance_function operator+ ( - const distance_function& rhs - ) const; - /*! - requires - - get_kernel() == rhs.get_kernel() - ensures - - returns a distance function DF such that: - - DF represents the sum of the point represented by *this and rhs - - DF.get_basis_vectors().size() == get_basis_vectors().size() + rhs.get_basis_vectors().size() - - DF.get_basis_vectors() contains all the basis vectors in both *this and rhs. - - DF.get_kernel() == get_kernel() - - DF.alpha == join_cols(get_alpha(), rhs.get_alpha()) - !*/ - - distance_function operator- ( - const distance_function& rhs - ) const; - /*! - requires - - get_kernel() == rhs.get_kernel() - ensures - - returns a distance function DF such that: - - DF represents the difference of the point represented by *this and rhs (i.e. *this - rhs) - - DF.get_basis_vectors().size() == get_basis_vectors().size() + rhs.get_basis_vectors().size() - - DF.get_basis_vectors() contains all the basis vectors in both *this and rhs. - - DF.get_kernel() == get_kernel() - - DF.alpha == join_cols(get_alpha(), -1 * rhs.get_alpha()) - !*/ - }; - - template < - typename K - > - distance_function<K> operator* ( - const typename K::scalar_type& val, - const distance_function<K>& df - ) { return df*val; } - /*! - ensures - - multiplies the point represented by *this by val and returns the result. This - function just allows multiplication syntax of the form val*df. - !*/ - - template < - typename K - > - void serialize ( - const distance_function<K>& item, - std::ostream& out - ); - /*! - provides serialization support for distance_function - !*/ - - template < - typename K - > - void deserialize ( - distance_function<K>& item, - std::istream& in - ); - /*! - provides serialization support for distance_function - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename function_type, - typename normalizer_type = vector_normalizer<typename function_type::sample_type> - > - struct normalized_function - { - /*! - REQUIREMENTS ON function_type - - function_type must be a function object with an overloaded - operator() similar to the other function objects defined in - this file. - - REQUIREMENTS ON normalizer_type - - normalizer_type must be a function object with an overloaded - operator() that takes a sample_type and returns a sample_type. - - WHAT THIS OBJECT REPRESENTS - This object represents a container for another function - object and an instance of a normalizer function. - - It automatically normalizes all inputs before passing them - off to the contained function object. - !*/ - - typedef typename function_type::result_type result_type; - typedef typename function_type::sample_type sample_type; - typedef typename function_type::mem_manager_type mem_manager_type; - - normalizer_type normalizer; - function_type function; - - normalized_function ( - ); - /*! - ensures - - the members of this object have their default values - !*/ - - normalized_function ( - const normalized_function& f - ); - /*! - ensures - - #*this is a copy of f - !*/ - - normalized_function ( - const vector_normalizer<sample_type>& normalizer_, - const function_type& funct - ) : normalizer(normalizer_), function(funct) {} - /*! - ensures - - populates this object with the vector_normalizer and function object - !*/ - - const std::vector<result_type> get_labels( - ) const; - /*! - ensures - - returns function.get_labels() - !*/ - - unsigned long number_of_classes ( - ) const; - /*! - ensures - - returns function.number_of_classes() - !*/ - - result_type operator() ( - const sample_type& x - ) const - /*! - ensures - - returns function(normalizer(x)) - !*/ - }; - - template < - typename function_type, - typename normalizer_type - > - void serialize ( - const normalized_function<function_type, normalizer_type>& item, - std::ostream& out - ); - /*! - provides serialization support for normalized_function - !*/ - - template < - typename function_type, - typename normalizer_type - > - void deserialize ( - normalized_function<function_type, normalizer_type>& item, - std::istream& in - ); - /*! - provides serialization support for normalized_function - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename K - > - struct projection_function - { - /*! - REQUIREMENTS ON K - K must be a kernel function object type as defined at the - top of dlib/svm/kernel_abstract.h - - WHAT THIS OBJECT REPRESENTS - This object represents a function that takes a data sample and projects - it into kernel feature space. The result is a real valued column vector that - represents a point in a kernel feature space. - - THREAD SAFETY - It is always safe to use distinct instances of this object in different - threads. However, when a single instance is shared between threads then - the following rules apply: - Instances of this object have a mutable cache which is used by const - member functions. Therefore, it is not safe to use one instance of - this object from multiple threads (unless protected by a mutex). - !*/ - - typedef K kernel_type; - typedef typename K::scalar_type scalar_type; - typedef typename K::sample_type sample_type; - typedef typename K::mem_manager_type mem_manager_type; - - typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type; - typedef matrix<scalar_type,0,0,mem_manager_type> scalar_matrix_type; - typedef matrix<sample_type,0,1,mem_manager_type> sample_vector_type; - typedef scalar_vector_type result_type; - - scalar_matrix_type weights; - K kernel_function; - sample_vector_type basis_vectors; - - projection_function ( - ); - /*! - ensures - - #weights.size() == 0 - - #basis_vectors.size() == 0 - !*/ - - projection_function ( - const projection_function& f - ); - /*! - ensures - - #*this is a copy of f - !*/ - - projection_function ( - const scalar_matrix_type& weights_, - const K& kernel_function_, - const sample_vector_type& basis_vectors_ - ) : weights(weights_), kernel_function(kernel_function_), basis_vectors(basis_vectors_) {} - /*! - ensures - - populates the projection function with the given basis vectors, weights, - and kernel function. - !*/ - - long out_vector_size ( - ) const; - /*! - ensures - - returns weights.nr() - (i.e. returns the dimensionality of the vectors output by this projection_function.) - !*/ - - const result_type& operator() ( - const sample_type& x - ) const - /*! - requires - - weights.nc() == basis_vectors.size() - - out_vector_size() > 0 - ensures - - Takes the given x sample and projects it onto part of the kernel feature - space spanned by the basis_vectors. The exact projection arithmetic is - defined below. - !*/ - { - // Run the x sample through all the basis functions we have and then - // multiply it by the weights matrix and return the result. Note that - // the temp vectors are here to avoid reallocating their memory every - // time this function is called. - temp1 = kernel_matrix(kernel_function, basis_vectors, x); - temp2 = weights*temp1; - return temp2; - } - - private: - mutable result_type temp1, temp2; - }; - - template < - typename K - > - void serialize ( - const projection_function<K>& item, - std::ostream& out - ); - /*! - provides serialization support for projection_function - !*/ - - template < - typename K - > - void deserialize ( - projection_function<K>& item, - std::istream& in - ); - /*! - provides serialization support for projection_function - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename K, - typename result_type_ = typename K::scalar_type - > - struct multiclass_linear_decision_function - { - /*! - REQUIREMENTS ON K - K must be either linear_kernel or sparse_linear_kernel. - - WHAT THIS OBJECT REPRESENTS - This object represents a multiclass classifier built out of a set of - binary classifiers. Each binary classifier is used to vote for the - correct multiclass label using a one vs. all strategy. Therefore, - if you have N classes then there will be N binary classifiers inside - this object. Additionally, this object is linear in the sense that - each of these binary classifiers is a simple linear plane. - - THREAD SAFETY - It is always safe to use distinct instances of this object in different - threads. However, when a single instance is shared between threads then - the following rules apply: - It is safe to call the const member functions of this object from - multiple threads. This is because the const members are purely - read-only operations. However, any operation that modifies a - multiclass_linear_decision_function is not threadsafe. - !*/ - - typedef result_type_ result_type; - - typedef K kernel_type; - typedef typename K::scalar_type scalar_type; - typedef typename K::sample_type sample_type; - typedef typename K::mem_manager_type mem_manager_type; - - typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type; - typedef matrix<scalar_type,0,0,mem_manager_type> scalar_matrix_type; - - scalar_matrix_type weights; - scalar_vector_type b; - std::vector<result_type> labels; - - const std::vector<result_type>& get_labels( - ) const { return labels; } - /*! - ensures - - returns a vector containing all the labels which can be - predicted by this object. - !*/ - - unsigned long number_of_classes ( - ) const; - /*! - ensures - - returns get_labels().size() - (i.e. returns the number of different labels/classes predicted by - this object) - !*/ - - std::pair<result_type, scalar_type> predict ( - const sample_type& x - ) const; - /*! - requires - - weights.size() > 0 - - weights.nr() == number_of_classes() == b.size() - - if (x is a dense vector, i.e. a dlib::matrix) then - - is_vector(x) == true - - x.size() == weights.nc() - (i.e. it must be legal to multiply weights with x) - ensures - - Returns the predicted label for the x sample and also it's score. - In particular, it returns the following: - std::make_pair(labels[index_of_max(weights*x-b)], max(weights*x-b)) - !*/ - - result_type operator() ( - const sample_type& x - ) const; - /*! - requires - - weights.size() > 0 - - weights.nr() == number_of_classes() == b.size() - - if (x is a dense vector, i.e. a dlib::matrix) then - - is_vector(x) == true - - x.size() == weights.nc() - (i.e. it must be legal to multiply weights with x) - ensures - - Returns the predicted label for the x sample. In particular, it returns - the following: - labels[index_of_max(weights*x-b)] - Or in other words, this function returns predict(x).first - !*/ - }; - - template < - typename K, - typename result_type_ - > - void serialize ( - const multiclass_linear_decision_function<K,result_type_>& item, - std::ostream& out - ); - /*! - provides serialization support for multiclass_linear_decision_function - !*/ - - template < - typename K, - typename result_type_ - > - void deserialize ( - multiclass_linear_decision_function<K,result_type_>& item, - std::istream& in - ); - /*! - provides serialization support for multiclass_linear_decision_function - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SVm_FUNCTION_ABSTRACT_ - - - diff --git a/ml/dlib/dlib/svm/kcentroid.h b/ml/dlib/dlib/svm/kcentroid.h deleted file mode 100644 index 5f380486a..000000000 --- a/ml/dlib/dlib/svm/kcentroid.h +++ /dev/null @@ -1,614 +0,0 @@ -// Copyright (C) 2008 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_KCENTROId_ -#define DLIB_KCENTROId_ - -#include <vector> - -#include "kcentroid_abstract.h" -#include "../matrix.h" -#include "function.h" -#include "../std_allocator.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template <typename kernel_type> - class kcentroid - { - /*! - This object represents a weighted sum of sample points in a kernel induced - feature space. It can be used to kernelize any algorithm that requires only - the ability to perform vector addition, subtraction, scalar multiplication, - and inner products. It uses the sparsification technique described in the - paper The Kernel Recursive Least Squares Algorithm by Yaakov Engel. - - To understand the code it would also be useful to consult page 114 of the book - Kernel Methods for Pattern Analysis by Taylor and Cristianini as well as page 554 - (particularly equation 18.31) of the book Learning with Kernels by Scholkopf and - Smola. Everything you really need to know is in the Engel paper. But the other - books help give more perspective on the issues involved. - - - INITIAL VALUE - - min_strength == 0 - - min_vect_idx == 0 - - K_inv.size() == 0 - - K.size() == 0 - - dictionary.size() == 0 - - bias == 0 - - bias_is_stale == false - - CONVENTION - - max_dictionary_size() == my_max_dictionary_size - - get_kernel() == kernel - - - K.nr() == dictionary.size() - - K.nc() == dictionary.size() - - for all valid r,c: - - K(r,c) == kernel(dictionary[r], dictionary[c]) - - K_inv == inv(K) - - - if (dictionary.size() == my_max_dictionary_size && my_remove_oldest_first == false) then - - for all valid 0 < i < dictionary.size(): - - Let STRENGTHS[i] == the delta you would get for dictionary[i] (i.e. Approximately - Linearly Dependent value) if you removed dictionary[i] from this object and then - tried to add it back in. - - min_strength == the minimum value from STRENGTHS - - min_vect_idx == the index of the element in STRENGTHS with the smallest value - - !*/ - - public: - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - - kcentroid ( - ) : - my_remove_oldest_first(false), - my_tolerance(0.001), - my_max_dictionary_size(1000000), - bias(0), - bias_is_stale(false) - { - clear_dictionary(); - } - - explicit kcentroid ( - const kernel_type& kernel_, - scalar_type tolerance_ = 0.001, - unsigned long max_dictionary_size_ = 1000000, - bool remove_oldest_first_ = false - ) : - my_remove_oldest_first(remove_oldest_first_), - kernel(kernel_), - my_tolerance(tolerance_), - my_max_dictionary_size(max_dictionary_size_), - bias(0), - bias_is_stale(false) - { - // make sure requires clause is not broken - DLIB_ASSERT(tolerance_ > 0 && max_dictionary_size_ > 1, - "\tkcentroid::kcentroid()" - << "\n\t You have to give a positive tolerance" - << "\n\t this: " << this - << "\n\t tolerance_: " << tolerance_ - << "\n\t max_dictionary_size_: " << max_dictionary_size_ - ); - - clear_dictionary(); - } - - scalar_type tolerance() const - { - return my_tolerance; - } - - unsigned long max_dictionary_size() const - { - return my_max_dictionary_size; - } - - bool remove_oldest_first ( - ) const - { - return my_remove_oldest_first; - } - - const kernel_type& get_kernel ( - ) const - { - return kernel; - } - - void clear_dictionary () - { - dictionary.clear(); - alpha.clear(); - - min_strength = 0; - min_vect_idx = 0; - K_inv.set_size(0,0); - K.set_size(0,0); - samples_seen = 0; - bias = 0; - bias_is_stale = false; - } - - scalar_type operator() ( - const kcentroid& x - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(x.get_kernel() == get_kernel(), - "\tscalar_type kcentroid::operator()(const kcentroid& x)" - << "\n\tYou can only compare two kcentroid objects if they use the same kernel" - << "\n\tthis: " << this - ); - - // make sure the bias terms are up to date - refresh_bias(); - x.refresh_bias(); - - scalar_type temp = x.bias + bias - 2*inner_product(x); - - if (temp > 0) - return std::sqrt(temp); - else - return 0; - } - - scalar_type inner_product ( - const sample_type& x - ) const - { - scalar_type temp = 0; - for (unsigned long i = 0; i < alpha.size(); ++i) - temp += alpha[i]*kernel(dictionary[i], x); - return temp; - } - - scalar_type inner_product ( - const kcentroid& x - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(x.get_kernel() == get_kernel(), - "\tscalar_type kcentroid::inner_product(const kcentroid& x)" - << "\n\tYou can only compare two kcentroid objects if they use the same kernel" - << "\n\tthis: " << this - ); - - scalar_type temp = 0; - for (unsigned long i = 0; i < alpha.size(); ++i) - { - for (unsigned long j = 0; j < x.alpha.size(); ++j) - { - temp += alpha[i]*x.alpha[j]*kernel(dictionary[i], x.dictionary[j]); - } - } - return temp; - } - - scalar_type squared_norm ( - ) const - { - refresh_bias(); - return bias; - } - - scalar_type operator() ( - const sample_type& x - ) const - { - // make sure the bias terms are up to date - refresh_bias(); - - const scalar_type kxx = kernel(x,x); - - scalar_type temp = kxx + bias - 2*inner_product(x); - if (temp > 0) - return std::sqrt(temp); - else - return 0; - } - - scalar_type samples_trained ( - ) const - { - return samples_seen; - } - - scalar_type test_and_train ( - const sample_type& x - ) - { - ++samples_seen; - const scalar_type xscale = 1/samples_seen; - const scalar_type cscale = 1-xscale; - return train_and_maybe_test(x,cscale,xscale,true); - } - - void train ( - const sample_type& x - ) - { - ++samples_seen; - const scalar_type xscale = 1/samples_seen; - const scalar_type cscale = 1-xscale; - train_and_maybe_test(x,cscale,xscale,false); - } - - scalar_type test_and_train ( - const sample_type& x, - scalar_type cscale, - scalar_type xscale - ) - { - ++samples_seen; - return train_and_maybe_test(x,cscale,xscale,true); - } - - void scale_by ( - scalar_type cscale - ) - { - for (unsigned long i = 0; i < alpha.size(); ++i) - { - alpha[i] = cscale*alpha[i]; - } - } - - void train ( - const sample_type& x, - scalar_type cscale, - scalar_type xscale - ) - { - ++samples_seen; - train_and_maybe_test(x,cscale,xscale,false); - } - - void swap ( - kcentroid& item - ) - { - exchange(min_strength, item.min_strength); - exchange(min_vect_idx, item.min_vect_idx); - exchange(my_remove_oldest_first, item.my_remove_oldest_first); - - exchange(kernel, item.kernel); - dictionary.swap(item.dictionary); - alpha.swap(item.alpha); - K_inv.swap(item.K_inv); - K.swap(item.K); - exchange(my_tolerance, item.my_tolerance); - exchange(samples_seen, item.samples_seen); - exchange(bias, item.bias); - a.swap(item.a); - k.swap(item.k); - exchange(bias_is_stale, item.bias_is_stale); - exchange(my_max_dictionary_size, item.my_max_dictionary_size); - } - - unsigned long dictionary_size ( - ) const { return dictionary.size(); } - - friend void serialize(const kcentroid& item, std::ostream& out) - { - serialize(item.min_strength, out); - serialize(item.min_vect_idx, out); - serialize(item.my_remove_oldest_first, out); - - serialize(item.kernel, out); - serialize(item.dictionary, out); - serialize(item.alpha, out); - serialize(item.K_inv, out); - serialize(item.K, out); - serialize(item.my_tolerance, out); - serialize(item.samples_seen, out); - serialize(item.bias, out); - serialize(item.bias_is_stale, out); - serialize(item.my_max_dictionary_size, out); - } - - friend void deserialize(kcentroid& item, std::istream& in) - { - deserialize(item.min_strength, in); - deserialize(item.min_vect_idx, in); - deserialize(item.my_remove_oldest_first, in); - - deserialize(item.kernel, in); - deserialize(item.dictionary, in); - deserialize(item.alpha, in); - deserialize(item.K_inv, in); - deserialize(item.K, in); - deserialize(item.my_tolerance, in); - deserialize(item.samples_seen, in); - deserialize(item.bias, in); - deserialize(item.bias_is_stale, in); - deserialize(item.my_max_dictionary_size, in); - } - - distance_function<kernel_type> get_distance_function ( - ) const - { - refresh_bias(); - return distance_function<kernel_type>(mat(alpha), - bias, - kernel, - mat(dictionary)); - } - - private: - - void refresh_bias ( - ) const - { - if (bias_is_stale) - { - bias_is_stale = false; - // recompute the bias term - bias = sum(pointwise_multiply(K, mat(alpha)*trans(mat(alpha)))); - } - } - - scalar_type train_and_maybe_test ( - const sample_type& x, - scalar_type cscale, - scalar_type xscale, - bool do_test - ) - { - scalar_type test_result = 0; - const scalar_type kx = kernel(x,x); - if (alpha.size() == 0) - { - // just ignore this sample if it is the zero vector (or really close to being zero) - if (std::abs(kx) > std::numeric_limits<scalar_type>::epsilon()) - { - // set initial state since this is the first training example we have seen - - K_inv.set_size(1,1); - K_inv(0,0) = 1/kx; - K.set_size(1,1); - K(0,0) = kx; - - alpha.push_back(xscale); - dictionary.push_back(x); - } - else - { - // the distance from an empty kcentroid and the zero vector is zero by definition. - return 0; - } - } - else - { - // fill in k - k.set_size(alpha.size()); - for (long r = 0; r < k.nr(); ++r) - k(r) = kernel(x,dictionary[r]); - - if (do_test) - { - refresh_bias(); - test_result = std::sqrt(kx + bias - 2*trans(mat(alpha))*k); - } - - // compute the error we would have if we approximated the new x sample - // with the dictionary. That is, do the ALD test from the KRLS paper. - a = K_inv*k; - scalar_type delta = kx - trans(k)*a; - - // if this new vector isn't approximately linearly dependent on the vectors - // in our dictionary. - if (delta > min_strength && delta > my_tolerance) - { - bool need_to_update_min_strength = false; - if (dictionary.size() >= my_max_dictionary_size) - { - // We need to remove one of the old members of the dictionary before - // we proceed with adding a new one. - long idx_to_remove; - if (my_remove_oldest_first) - { - // remove the oldest one - idx_to_remove = 0; - } - else - { - // if we have never computed the min_strength then we should compute it - if (min_strength == 0) - recompute_min_strength(); - - // select the dictionary vector that is most linearly dependent for removal - idx_to_remove = min_vect_idx; - need_to_update_min_strength = true; - } - - remove_dictionary_vector(idx_to_remove); - - // recompute these guys since they were computed with the old - // kernel matrix - k = remove_row(k,idx_to_remove); - a = K_inv*k; - delta = kx - trans(k)*a; - } - - // add x to the dictionary - dictionary.push_back(x); - - - // update K_inv by computing the new one in the temp matrix (equation 3.14) - matrix<scalar_type,0,0,mem_manager_type> temp(K_inv.nr()+1, K_inv.nc()+1); - // update the middle part of the matrix - set_subm(temp, get_rect(K_inv)) = K_inv + a*trans(a)/delta; - // update the right column of the matrix - set_subm(temp, 0, K_inv.nr(),K_inv.nr(),1) = -a/delta; - // update the bottom row of the matrix - set_subm(temp, K_inv.nr(), 0, 1, K_inv.nr()) = trans(-a/delta); - // update the bottom right corner of the matrix - temp(K_inv.nr(), K_inv.nc()) = 1/delta; - // put temp into K_inv - temp.swap(K_inv); - - - - // update K (the kernel matrix) - temp.set_size(K.nr()+1, K.nc()+1); - set_subm(temp, get_rect(K)) = K; - // update the right column of the matrix - set_subm(temp, 0, K.nr(),K.nr(),1) = k; - // update the bottom row of the matrix - set_subm(temp, K.nr(), 0, 1, K.nr()) = trans(k); - temp(K.nr(), K.nc()) = kx; - // put temp into K - temp.swap(K); - - - // now update the alpha vector - for (unsigned long i = 0; i < alpha.size(); ++i) - { - alpha[i] *= cscale; - } - alpha.push_back(xscale); - - - if (need_to_update_min_strength) - { - // now we have to recompute the min_strength in this case - recompute_min_strength(); - } - } - else - { - // update the alpha vector so that this new sample has been added into - // the mean vector we are accumulating - for (unsigned long i = 0; i < alpha.size(); ++i) - { - alpha[i] = cscale*alpha[i] + xscale*a(i); - } - } - } - - bias_is_stale = true; - - return test_result; - } - - void remove_dictionary_vector ( - long i - ) - /*! - requires - - 0 <= i < dictionary.size() - ensures - - #dictionary.size() == dictionary.size() - 1 - - #alpha.size() == alpha.size() - 1 - - updates the K_inv matrix so that it is still a proper inverse of the - kernel matrix - - also removes the necessary row and column from the K matrix - - uses the this->a variable so after this function runs that variable - will contain a different value. - !*/ - { - // remove the dictionary vector - dictionary.erase(dictionary.begin()+i); - - // remove the i'th vector from the inverse kernel matrix. This formula is basically - // just the reverse of the way K_inv is updated by equation 3.14 during normal training. - K_inv = removerc(K_inv,i,i) - remove_row(colm(K_inv,i)/K_inv(i,i),i)*remove_col(rowm(K_inv,i),i); - - // now compute the updated alpha values to take account that we just removed one of - // our dictionary vectors - a = (K_inv*remove_row(K,i)*mat(alpha)); - - // now copy over the new alpha values - alpha.resize(alpha.size()-1); - for (unsigned long k = 0; k < alpha.size(); ++k) - { - alpha[k] = a(k); - } - - // update the K matrix as well - K = removerc(K,i,i); - } - - void recompute_min_strength ( - ) - /*! - ensures - - recomputes the min_strength and min_vect_idx values - so that they are correct with respect to the CONVENTION - - uses the this->a variable so after this function runs that variable - will contain a different value. - !*/ - { - min_strength = std::numeric_limits<scalar_type>::max(); - - // here we loop over each dictionary vector and compute what its delta would be if - // we were to remove it from the dictionary and then try to add it back in. - for (unsigned long i = 0; i < dictionary.size(); ++i) - { - // compute a = K_inv*k but where dictionary vector i has been removed - a = (removerc(K_inv,i,i) - remove_row(colm(K_inv,i)/K_inv(i,i),i)*remove_col(rowm(K_inv,i),i)) * - (remove_row(colm(K,i),i)); - scalar_type delta = K(i,i) - trans(remove_row(colm(K,i),i))*a; - - if (delta < min_strength) - { - min_strength = delta; - min_vect_idx = i; - } - } - } - - - - typedef std_allocator<sample_type, mem_manager_type> alloc_sample_type; - typedef std_allocator<scalar_type, mem_manager_type> alloc_scalar_type; - typedef std::vector<sample_type,alloc_sample_type> dictionary_vector_type; - typedef std::vector<scalar_type,alloc_scalar_type> alpha_vector_type; - - - scalar_type min_strength; - unsigned long min_vect_idx; - bool my_remove_oldest_first; - - kernel_type kernel; - dictionary_vector_type dictionary; - alpha_vector_type alpha; - - matrix<scalar_type,0,0,mem_manager_type> K_inv; - matrix<scalar_type,0,0,mem_manager_type> K; - - scalar_type my_tolerance; - unsigned long my_max_dictionary_size; - scalar_type samples_seen; - mutable scalar_type bias; - mutable bool bias_is_stale; - - - // temp variables here just so we don't have to reconstruct them over and over. Thus, - // they aren't really part of the state of this object. - matrix<scalar_type,0,1,mem_manager_type> a; - matrix<scalar_type,0,1,mem_manager_type> k; - - }; - -// ---------------------------------------------------------------------------------------- - - template <typename kernel_type> - void swap(kcentroid<kernel_type>& a, kcentroid<kernel_type>& b) - { a.swap(b); } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_KCENTROId_ - diff --git a/ml/dlib/dlib/svm/kcentroid_abstract.h b/ml/dlib/dlib/svm/kcentroid_abstract.h deleted file mode 100644 index 44b94c813..000000000 --- a/ml/dlib/dlib/svm/kcentroid_abstract.h +++ /dev/null @@ -1,339 +0,0 @@ -// Copyright (C) 2008 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_KCENTROId_ABSTRACT_ -#ifdef DLIB_KCENTROId_ABSTRACT_ - -#include "../algs.h" -#include "../serialize.h" -#include "kernel_abstract.h" - -namespace dlib -{ - - template < - typename kernel_type - > - class kcentroid - { - /*! - REQUIREMENTS ON kernel_type - is a kernel function object as defined in dlib/svm/kernel_abstract.h - - INITIAL VALUE - - dictionary_size() == 0 - - samples_trained() == 0 - - WHAT THIS OBJECT REPRESENTS - This object represents a weighted sum of sample points in a kernel induced - feature space. It can be used to kernelize any algorithm that requires only - the ability to perform vector addition, subtraction, scalar multiplication, - and inner products. - - An example use of this object is as an online algorithm for recursively estimating - the centroid of a sequence of training points. This object then allows you to - compute the distance between the centroid and any test points. So you can use - this object to predict how similar a test point is to the data this object has - been trained on (larger distances from the centroid indicate dissimilarity/anomalous - points). - - Also note that the algorithm internally keeps a set of "dictionary vectors" - that are used to represent the centroid. You can force the algorithm to use - no more than a set number of vectors by setting the 3rd constructor argument - to whatever you want. - - This object uses the sparsification technique described in the paper The - Kernel Recursive Least Squares Algorithm by Yaakov Engel. This technique - allows us to keep the number of dictionary vectors down to a minimum. In fact, - the object has a user selectable tolerance parameter that controls the trade off - between accuracy and number of stored dictionary vectors. - !*/ - - public: - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - - kcentroid ( - ); - /*! - ensures - - this object is properly initialized - - #tolerance() == 0.001 - - #get_kernel() == kernel_type() (i.e. whatever the kernel's default value is) - - #max_dictionary_size() == 1000000 - - #remove_oldest_first() == false - !*/ - - explicit kcentroid ( - const kernel_type& kernel_, - scalar_type tolerance_ = 0.001, - unsigned long max_dictionary_size_ = 1000000, - bool remove_oldest_first_ = false - ); - /*! - requires - - tolerance > 0 - - max_dictionary_size_ > 1 - ensures - - this object is properly initialized - - #tolerance() == tolerance_ - - #get_kernel() == kernel_ - - #max_dictionary_size() == max_dictionary_size_ - - #remove_oldest_first() == remove_oldest_first_ - !*/ - - const kernel_type& get_kernel ( - ) const; - /*! - ensures - - returns a const reference to the kernel used by this object - !*/ - - unsigned long max_dictionary_size( - ) const; - /*! - ensures - - returns the maximum number of dictionary vectors this object will - use at a time. That is, dictionary_size() will never be greater - than max_dictionary_size(). - !*/ - - bool remove_oldest_first ( - ) const; - /*! - ensures - - When the maximum dictionary size is reached this object sometimes - needs to discard dictionary vectors when new samples are added via - one of the train functions. When this happens this object chooses - the dictionary vector to discard based on the setting of the - remove_oldest_first() parameter. - - if (remove_oldest_first() == true) then - - This object discards the oldest dictionary vectors when necessary. - This is an appropriate mode when using this object in an online - setting and the input training samples come from a slowly - varying distribution. - - else (remove_oldest_first() == false) then - - This object discards the most linearly dependent dictionary vectors - when necessary. This it the default behavior and should be used - in most cases. - !*/ - - unsigned long dictionary_size ( - ) const; - /*! - ensures - - returns the number of basis vectors in the dictionary. These are - the basis vectors used by this object to represent a point in kernel - feature space. - !*/ - - scalar_type samples_trained ( - ) const; - /*! - ensures - - returns the number of samples this object has been trained on so far - !*/ - - scalar_type tolerance( - ) const; - /*! - ensures - - returns the tolerance to use for the approximately linearly dependent - test used for sparsification (see the KRLS paper for details). This is - a number which governs how accurately this object will approximate the - centroid it is learning. Smaller values generally result in a more - accurate estimate while also resulting in a bigger set of vectors in - the dictionary. Bigger tolerances values result in a less accurate - estimate but also in less dictionary vectors. (Note that in any case, - the max_dictionary_size() limits the number of dictionary vectors no - matter the setting of the tolerance) - - The exact meaning of the tolerance parameter is the following: - Imagine that we have an empirical_kernel_map that contains all - the current dictionary vectors. Then the tolerance is the minimum - projection error (as given by empirical_kernel_map::project()) required - to cause us to include a new vector in the dictionary. So each time - you call train() the kcentroid basically just computes the projection - error for that new sample and if it is larger than the tolerance - then that new sample becomes part of the dictionary. - !*/ - - void clear_dictionary ( - ); - /*! - ensures - - clears out all learned data (e.g. #dictionary_size() == 0) - - #samples_seen() == 0 - !*/ - - scalar_type operator() ( - const kcentroid& x - ) const; - /*! - requires - - x.get_kernel() == get_kernel() - ensures - - returns the distance in kernel feature space between this centroid and the - centroid represented by x. - !*/ - - scalar_type operator() ( - const sample_type& x - ) const; - /*! - ensures - - returns the distance in kernel feature space between the sample x and the - current estimate of the centroid of the training samples given - to this object so far. - !*/ - - scalar_type inner_product ( - const sample_type& x - ) const; - /*! - ensures - - returns the inner product of the given x point and the current - estimate of the centroid of the training samples given to this object - so far. - !*/ - - scalar_type inner_product ( - const kcentroid& x - ) const; - /*! - requires - - x.get_kernel() == get_kernel() - ensures - - returns the inner product between x and this centroid object. - !*/ - - scalar_type squared_norm ( - ) const; - /*! - ensures - - returns the squared norm of the centroid vector represented by this - object. I.e. returns this->inner_product(*this) - !*/ - - void train ( - const sample_type& x - ); - /*! - ensures - - adds the sample x into the current estimate of the centroid - - also note that calling this function is equivalent to calling - train(x, samples_trained()/(samples_trained()+1.0, 1.0/(samples_trained()+1.0). - That is, this function finds the normal unweighted centroid of all training points. - !*/ - - void train ( - const sample_type& x, - scalar_type cscale, - scalar_type xscale - ); - /*! - ensures - - adds the sample x into the current estimate of the centroid but - uses a user given scale. That is, this function performs: - - new_centroid = cscale*old_centroid + xscale*x - - This function allows you to weight different samples however - you want. - !*/ - - void scale_by ( - scalar_type cscale - ); - /*! - ensures - - multiplies the current centroid vector by the given scale value. - This function is equivalent to calling train(some_x_value, cscale, 0). - So it performs: - - new_centroid == cscale*old_centroid - !*/ - - scalar_type test_and_train ( - const sample_type& x - ); - /*! - ensures - - calls train(x) - - returns (*this)(x) - - The reason this function exists is because train() and operator() - both compute some of the same things. So this function is more efficient - than calling both individually. - !*/ - - scalar_type test_and_train ( - const sample_type& x, - scalar_type cscale, - scalar_type xscale - ); - /*! - ensures - - calls train(x,cscale,xscale) - - returns (*this)(x) - - The reason this function exists is because train() and operator() - both compute some of the same things. So this function is more efficient - than calling both individually. - !*/ - - void swap ( - kcentroid& item - ); - /*! - ensures - - swaps *this with item - !*/ - - distance_function<kernel_type> get_distance_function ( - ) const; - /*! - ensures - - returns a distance function F that represents the point learned - by this object so far. I.e. it is the case that: - - for all x: F(x) == (*this)(x) - !*/ - - - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename kernel_type - > - void swap( - kcentroid<kernel_type>& a, - kcentroid<kernel_type>& b - ) { a.swap(b); } - /*! - provides a global swap function - !*/ - - template < - typename kernel_type - > - void serialize ( - const kcentroid<kernel_type>& item, - std::ostream& out - ); - /*! - provides serialization support for kcentroid objects - !*/ - - template < - typename kernel_type - > - void deserialize ( - kcentroid<kernel_type>& item, - std::istream& in - ); - /*! - provides serialization support for kcentroid objects - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_KCENTROId_ABSTRACT_ - diff --git a/ml/dlib/dlib/svm/kcentroid_overloads.h b/ml/dlib/dlib/svm/kcentroid_overloads.h deleted file mode 100644 index 9c39f3d78..000000000 --- a/ml/dlib/dlib/svm/kcentroid_overloads.h +++ /dev/null @@ -1,1324 +0,0 @@ -// Copyright (C) 2009 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_KCENTROId_OVERLOADS_ -#define DLIB_KCENTROId_OVERLOADS_ - -#include "kcentroid_abstract.h" -#include "sparse_kernel.h" -#include "sparse_vector.h" -#include <map> - -namespace dlib -{ - /* - This file contains optimized overloads of the kcentroid object for the following - linear cases: - kcentroid<linear_kernel<T>> - kcentroid<sparse_linear_kernel<T>> - kcentroid<offset_kernel<linear_kernel<T>>> - kcentroid<offset_kernel<sparse_linear_kernel<T>>> - */ - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// Overloads for when kernel_type == linear_kernel -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template <typename T> - class kcentroid<linear_kernel<T> > - { - - - typedef linear_kernel<T> kernel_type; - public: - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - - - explicit kcentroid ( - const kernel_type& kernel_, - scalar_type tolerance_ = 0.001, - unsigned long max_dictionary_size_ = 1000000, - bool remove_oldest_first_ = false - ) : - my_remove_oldest_first(remove_oldest_first_), - kernel(kernel_), - my_tolerance(tolerance_), - my_max_dictionary_size(max_dictionary_size_) - { - // make sure requires clause is not broken - DLIB_ASSERT(tolerance_ >= 0 && max_dictionary_size_ > 0, - "\tkcentroid::kcentroid()" - << "\n\t You have to give a positive tolerance" - << "\n\t this: " << this - << "\n\t tolerance_: " << tolerance_ - << "\n\t max_dictionary_size_: " << max_dictionary_size_ - ); - - clear_dictionary(); - } - - scalar_type tolerance() const { return my_tolerance; } - unsigned long max_dictionary_size() const { return my_max_dictionary_size; } - bool remove_oldest_first () const { return my_remove_oldest_first; } - const kernel_type& get_kernel () const { return kernel; } - scalar_type samples_trained () const { return samples_seen; } - - void clear_dictionary () - { - samples_seen = 0; - set_all_elements(w, 0); - alpha = 0; - } - - scalar_type operator() ( - const kcentroid& x - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(x.get_kernel() == get_kernel(), - "\tscalar_type kcentroid::operator()(const kcentroid& x)" - << "\n\tYou can only compare two kcentroid objects if they use the same kernel" - << "\n\tthis: " << this - ); - - if (w.size() > 0) - { - if (x.w.size() > 0) - return length(alpha*w - x.alpha*x.w); - else - return alpha*length(w); - } - else - { - if (x.w.size() > 0) - return x.alpha*length(x.w); - else - return 0; - } - } - - scalar_type inner_product ( - const sample_type& x - ) const - { - if (w.size() > 0) - return alpha*trans(w)*x; - else - return 0; - } - - scalar_type inner_product ( - const kcentroid& x - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(x.get_kernel() == get_kernel(), - "\tscalar_type kcentroid::inner_product(const kcentroid& x)" - << "\n\tYou can only compare two kcentroid objects if they use the same kernel" - << "\n\tthis: " << this - ); - - if (w.size() > 0 && x.w.size() > 0) - return alpha*x.alpha*trans(w)*x.w; - else - return 0; - } - - scalar_type squared_norm ( - ) const - { - if (w.size() > 0) - return alpha*alpha*trans(w)*w; - else - return 0; - } - - scalar_type operator() ( - const sample_type& x - ) const - { - if (w.size() > 0) - return length(x-alpha*w); - else - return length(x); - } - - scalar_type test_and_train ( - const sample_type& x - ) - { - ++samples_seen; - const scalar_type xscale = 1/samples_seen; - const scalar_type cscale = 1-xscale; - - do_train(x, cscale, xscale); - - return (*this)(x); - } - - void train ( - const sample_type& x - ) - { - ++samples_seen; - const scalar_type xscale = 1/samples_seen; - const scalar_type cscale = 1-xscale; - - do_train(x, cscale, xscale); - } - - scalar_type test_and_train ( - const sample_type& x, - scalar_type cscale, - scalar_type xscale - ) - { - ++samples_seen; - - do_train(x, cscale, xscale); - - return (*this)(x); - } - - void scale_by ( - scalar_type cscale - ) - { - alpha *= cscale; - } - - void train ( - const sample_type& x, - scalar_type cscale, - scalar_type xscale - ) - { - ++samples_seen; - do_train(x, cscale, xscale); - } - - void swap ( - kcentroid& item - ) - { - exchange(my_remove_oldest_first, item.my_remove_oldest_first); - exchange(kernel, item.kernel); - exchange(w, item.w); - exchange(alpha, item.alpha); - exchange(my_tolerance, item.my_tolerance); - exchange(my_max_dictionary_size, item.my_max_dictionary_size); - exchange(samples_seen, item.samples_seen); - } - - unsigned long dictionary_size ( - ) const - { - if (samples_seen > 0) - return 1; - else - return 0; - } - - friend void serialize(const kcentroid& item, std::ostream& out) - { - serialize(item.my_remove_oldest_first, out); - serialize(item.kernel, out); - serialize(item.w, out); - serialize(item.alpha, out); - serialize(item.my_tolerance, out); - serialize(item.my_max_dictionary_size, out); - serialize(item.samples_seen, out); - } - - friend void deserialize(kcentroid& item, std::istream& in) - { - deserialize(item.my_remove_oldest_first, in); - deserialize(item.kernel, in); - deserialize(item.w, in); - deserialize(item.alpha, in); - deserialize(item.my_tolerance, in); - deserialize(item.my_max_dictionary_size, in); - deserialize(item.samples_seen, in); - } - - distance_function<kernel_type> get_distance_function ( - ) const - { - if (samples_seen > 0) - { - typename distance_function<kernel_type>::sample_vector_type temp_basis_vectors; - typename distance_function<kernel_type>::scalar_vector_type temp_alpha; - - temp_basis_vectors.set_size(1); - temp_basis_vectors(0) = w; - temp_alpha.set_size(1); - temp_alpha(0) = alpha; - - return distance_function<kernel_type>(temp_alpha, squared_norm(), kernel, temp_basis_vectors); - } - else - { - return distance_function<kernel_type>(kernel); - } - } - - private: - - void do_train ( - const sample_type& x, - scalar_type cscale, - scalar_type xscale - ) - { - set_size_of_w(x); - - const scalar_type temp = cscale*alpha; - - if (temp != 0) - { - w = w + xscale*x/temp; - alpha = temp; - } - else - { - w = cscale*alpha*w + xscale*x; - alpha = 1; - } - } - - void set_size_of_w ( - const sample_type& x - ) - { - if (x.size() != w.size()) - { - w.set_size(x.nr(), x.nc()); - set_all_elements(w, 0); - alpha = 0; - } - } - - bool my_remove_oldest_first; - - kernel_type kernel; - - sample_type w; - scalar_type alpha; - - - scalar_type my_tolerance; - unsigned long my_max_dictionary_size; - scalar_type samples_seen; - - }; - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// Overloads for when kernel_type == offset_kernel<linear_kernel> -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template <typename T> - class kcentroid<offset_kernel<linear_kernel<T> > > - { - - /*! - INITIAL VALUE - - x_extra == sqrt(kernel.offset) - - CONVENTION - - x_extra == sqrt(kernel.offset) - - w_extra == the value of the extra dimension tacked onto the - end of the w vector - !*/ - - typedef offset_kernel<linear_kernel<T> > kernel_type; - public: - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - - - explicit kcentroid ( - const kernel_type& kernel_, - scalar_type tolerance_ = 0.001, - unsigned long max_dictionary_size_ = 1000000, - bool remove_oldest_first_ = false - ) : - my_remove_oldest_first(remove_oldest_first_), - kernel(kernel_), - my_tolerance(tolerance_), - my_max_dictionary_size(max_dictionary_size_) - { - // make sure requires clause is not broken - DLIB_ASSERT(tolerance_ >= 0 && max_dictionary_size_ > 0, - "\tkcentroid::kcentroid()" - << "\n\t You have to give a positive tolerance" - << "\n\t this: " << this - << "\n\t tolerance_: " << tolerance_ - << "\n\t max_dictionary_size_: " << max_dictionary_size_ - ); - - x_extra = std::sqrt(kernel.offset); - - clear_dictionary(); - } - - scalar_type tolerance() const { return my_tolerance; } - unsigned long max_dictionary_size() const { return my_max_dictionary_size; } - bool remove_oldest_first () const { return my_remove_oldest_first; } - const kernel_type& get_kernel () const { return kernel; } - scalar_type samples_trained () const { return samples_seen; } - - void clear_dictionary () - { - samples_seen = 0; - set_all_elements(w, 0); - alpha = 0; - w_extra = x_extra; - } - - scalar_type operator() ( - const kcentroid& x - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(x.get_kernel() == get_kernel(), - "\tscalar_type kcentroid::operator()(const kcentroid& x)" - << "\n\tYou can only compare two kcentroid objects if they use the same kernel" - << "\n\tthis: " << this - ); - - if (w.size() > 0) - { - if (x.w.size() > 0) - { - scalar_type temp1 = length_squared(alpha*w - x.alpha*x.w); - scalar_type temp2 = alpha*w_extra - x.alpha*x.w_extra; - return std::sqrt(temp1 + temp2*temp2); - } - else - { - return alpha*std::sqrt(length_squared(w) + w_extra*w_extra); - } - } - else - { - if (x.w.size() > 0) - return x.alpha*std::sqrt(length_squared(x.w) + x.w_extra*x.w_extra); - else - return 0; - } - } - - scalar_type inner_product ( - const sample_type& x - ) const - { - if (w.size() > 0) - return alpha*(trans(w)*x + w_extra*x_extra); - else - return 0; - } - - scalar_type inner_product ( - const kcentroid& x - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(x.get_kernel() == get_kernel(), - "\tscalar_type kcentroid::inner_product(const kcentroid& x)" - << "\n\tYou can only compare two kcentroid objects if they use the same kernel" - << "\n\tthis: " << this - ); - - if (w.size() > 0 && x.w.size() > 0) - return alpha*x.alpha*(trans(w)*x.w + w_extra*x.w_extra); - else - return 0; - } - - scalar_type squared_norm ( - ) const - { - if (w.size() > 0) - return alpha*alpha*(trans(w)*w + w_extra*w_extra); - else - return 0; - } - - scalar_type operator() ( - const sample_type& x - ) const - { - if (w.size() > 0) - { - scalar_type temp1 = length_squared(x-alpha*w); - scalar_type temp2 = x_extra - alpha*w_extra; - return std::sqrt(temp1 + temp2*temp2); - } - else - { - return std::sqrt(length_squared(x) + x_extra*x_extra); - } - } - - scalar_type test_and_train ( - const sample_type& x - ) - { - ++samples_seen; - const scalar_type xscale = 1/samples_seen; - const scalar_type cscale = 1-xscale; - - do_train(x, cscale, xscale); - - return (*this)(x); - } - - void train ( - const sample_type& x - ) - { - ++samples_seen; - const scalar_type xscale = 1/samples_seen; - const scalar_type cscale = 1-xscale; - - do_train(x, cscale, xscale); - } - - scalar_type test_and_train ( - const sample_type& x, - scalar_type cscale, - scalar_type xscale - ) - { - ++samples_seen; - - do_train(x, cscale, xscale); - - return (*this)(x); - } - - void scale_by ( - scalar_type cscale - ) - { - alpha *= cscale; - w_extra *= cscale; - } - - void train ( - const sample_type& x, - scalar_type cscale, - scalar_type xscale - ) - { - ++samples_seen; - do_train(x, cscale, xscale); - } - - void swap ( - kcentroid& item - ) - { - exchange(my_remove_oldest_first, item.my_remove_oldest_first); - exchange(kernel, item.kernel); - exchange(w, item.w); - exchange(alpha, item.alpha); - exchange(w_extra, item.w_extra); - exchange(x_extra, item.x_extra); - exchange(my_tolerance, item.my_tolerance); - exchange(my_max_dictionary_size, item.my_max_dictionary_size); - exchange(samples_seen, item.samples_seen); - } - - unsigned long dictionary_size ( - ) const - { - if (samples_seen > 0) - { - if (std::abs(w_extra) > std::numeric_limits<scalar_type>::epsilon()) - return 1; - else - return 2; - } - else - return 0; - } - - friend void serialize(const kcentroid& item, std::ostream& out) - { - serialize(item.my_remove_oldest_first, out); - serialize(item.kernel, out); - serialize(item.w, out); - serialize(item.alpha, out); - serialize(item.w_extra, out); - serialize(item.x_extra, out); - serialize(item.my_tolerance, out); - serialize(item.my_max_dictionary_size, out); - serialize(item.samples_seen, out); - } - - friend void deserialize(kcentroid& item, std::istream& in) - { - deserialize(item.my_remove_oldest_first, in); - deserialize(item.kernel, in); - deserialize(item.w, in); - deserialize(item.alpha, in); - deserialize(item.w_extra, in); - deserialize(item.x_extra, in); - deserialize(item.my_tolerance, in); - deserialize(item.my_max_dictionary_size, in); - deserialize(item.samples_seen, in); - } - - distance_function<kernel_type> get_distance_function ( - ) const - { - - if (samples_seen > 0) - { - typename distance_function<kernel_type>::sample_vector_type temp_basis_vectors; - typename distance_function<kernel_type>::scalar_vector_type temp_alpha; - - // What we are doing here needs a bit of explanation. The w vector - // has an implicit extra dimension tacked on to it with the value of w_extra. - // The kernel we are using takes normal vectors and implicitly tacks the value - // x_extra onto their end. So what we are doing here is scaling w so that - // the value it should have tacked onto it is x_scale. Note that we also - // adjust alpha so that the combination of alpha*w stays the same. - scalar_type scale; - - // if w_extra is basically greater than 0 - if (std::abs(w_extra) > std::numeric_limits<scalar_type>::epsilon()) - { - scale = (x_extra/w_extra); - temp_basis_vectors.set_size(1); - temp_alpha.set_size(1); - temp_basis_vectors(0) = w*scale; - temp_alpha(0) = alpha/scale; - } - else - { - // In this case w_extra is zero. So the only way we can get the same - // thing in the output basis vector set is by using two vectors - temp_basis_vectors.set_size(2); - temp_alpha.set_size(2); - temp_basis_vectors(0) = 2*w; - temp_alpha(0) = alpha; - temp_basis_vectors(1) = w; - temp_alpha(1) = -alpha; - } - - - return distance_function<kernel_type>(temp_alpha, squared_norm(), kernel, temp_basis_vectors); - } - else - { - return distance_function<kernel_type>(kernel); - } - } - - private: - - void do_train ( - const sample_type& x, - scalar_type cscale, - scalar_type xscale - ) - { - set_size_of_w(x); - - const scalar_type temp = cscale*alpha; - - if (temp != 0) - { - w = w + xscale*x/temp; - w_extra = w_extra + xscale*x_extra/temp; - alpha = temp; - } - else - { - w = cscale*alpha*w + xscale*x; - w_extra = cscale*alpha*w_extra + xscale*x_extra; - alpha = 1; - } - } - - void set_size_of_w ( - const sample_type& x - ) - { - if (x.size() != w.size()) - { - w.set_size(x.nr(), x.nc()); - set_all_elements(w, 0); - alpha = 0; - w_extra = x_extra; - } - } - - bool my_remove_oldest_first; - - kernel_type kernel; - - sample_type w; - scalar_type alpha; - - scalar_type w_extra; - scalar_type x_extra; - - - scalar_type my_tolerance; - unsigned long my_max_dictionary_size; - scalar_type samples_seen; - - }; - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// Overloads for when kernel_type == sparse_linear_kernel -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template <typename T> - class kcentroid<sparse_linear_kernel<T> > - { - - - typedef sparse_linear_kernel<T> kernel_type; - public: - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - - - explicit kcentroid ( - const kernel_type& kernel_, - scalar_type tolerance_ = 0.001, - unsigned long max_dictionary_size_ = 1000000, - bool remove_oldest_first_ = false - ) : - my_remove_oldest_first(remove_oldest_first_), - kernel(kernel_), - my_tolerance(tolerance_), - my_max_dictionary_size(max_dictionary_size_) - { - // make sure requires clause is not broken - DLIB_ASSERT(tolerance_ >= 0 && max_dictionary_size_ > 0, - "\tkcentroid::kcentroid()" - << "\n\t You have to give a positive tolerance" - << "\n\t this: " << this - << "\n\t tolerance_: " << tolerance_ - << "\n\t max_dictionary_size_: " << max_dictionary_size_ - ); - - clear_dictionary(); - } - - scalar_type tolerance() const { return my_tolerance; } - unsigned long max_dictionary_size() const { return my_max_dictionary_size; } - bool remove_oldest_first () const { return my_remove_oldest_first; } - const kernel_type& get_kernel () const { return kernel; } - scalar_type samples_trained () const { return samples_seen; } - - void clear_dictionary () - { - samples_seen = 0; - w.clear(); - alpha = 0; - } - - scalar_type operator() ( - const kcentroid& x - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(x.get_kernel() == get_kernel(), - "\tscalar_type kcentroid::operator()(const kcentroid& x)" - << "\n\tYou can only compare two kcentroid objects if they use the same kernel" - << "\n\tthis: " << this - ); - - return distance(alpha,w , x.alpha,x.w); - } - - scalar_type inner_product ( - const sample_type& x - ) const - { - return alpha*dot(w,x); - } - - scalar_type inner_product ( - const kcentroid& x - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(x.get_kernel() == get_kernel(), - "\tscalar_type kcentroid::inner_product(const kcentroid& x)" - << "\n\tYou can only compare two kcentroid objects if they use the same kernel" - << "\n\tthis: " << this - ); - - return alpha*x.alpha*dot(w,x.w); - } - - scalar_type squared_norm ( - ) const - { - return alpha*alpha*length_squared(w); - } - - scalar_type operator() ( - const sample_type& x - ) const - { - return distance(static_cast<scalar_type>(1), x, alpha, w); - } - - scalar_type test_and_train ( - const sample_type& x - ) - { - ++samples_seen; - const scalar_type xscale = 1/samples_seen; - const scalar_type cscale = 1-xscale; - - do_train(x, cscale, xscale); - - return (*this)(x); - } - - void train ( - const sample_type& x - ) - { - ++samples_seen; - const scalar_type xscale = 1/samples_seen; - const scalar_type cscale = 1-xscale; - - do_train(x, cscale, xscale); - } - - scalar_type test_and_train ( - const sample_type& x, - scalar_type cscale, - scalar_type xscale - ) - { - ++samples_seen; - - do_train(x, cscale, xscale); - - return (*this)(x); - } - - void scale_by ( - scalar_type cscale - ) - { - alpha *= cscale; - } - - void train ( - const sample_type& x, - scalar_type cscale, - scalar_type xscale - ) - { - ++samples_seen; - do_train(x, cscale, xscale); - } - - void swap ( - kcentroid& item - ) - { - exchange(my_remove_oldest_first, item.my_remove_oldest_first); - exchange(kernel, item.kernel); - exchange(w, item.w); - exchange(alpha, item.alpha); - exchange(my_tolerance, item.my_tolerance); - exchange(my_max_dictionary_size, item.my_max_dictionary_size); - exchange(samples_seen, item.samples_seen); - } - - unsigned long dictionary_size ( - ) const - { - if (samples_seen > 0) - return 1; - else - return 0; - } - - friend void serialize(const kcentroid& item, std::ostream& out) - { - serialize(item.my_remove_oldest_first, out); - serialize(item.kernel, out); - serialize(item.w, out); - serialize(item.alpha, out); - serialize(item.my_tolerance, out); - serialize(item.my_max_dictionary_size, out); - serialize(item.samples_seen, out); - } - - friend void deserialize(kcentroid& item, std::istream& in) - { - deserialize(item.my_remove_oldest_first, in); - deserialize(item.kernel, in); - deserialize(item.w, in); - deserialize(item.alpha, in); - deserialize(item.my_tolerance, in); - deserialize(item.my_max_dictionary_size, in); - deserialize(item.samples_seen, in); - } - - distance_function<kernel_type> get_distance_function ( - ) const - { - if (samples_seen > 0) - { - typename distance_function<kernel_type>::sample_vector_type temp_basis_vectors; - typename distance_function<kernel_type>::scalar_vector_type temp_alpha; - - temp_basis_vectors.set_size(1); - temp_basis_vectors(0) = sample_type(w.begin(), w.end()); - temp_alpha.set_size(1); - temp_alpha(0) = alpha; - - return distance_function<kernel_type>(temp_alpha, squared_norm(), kernel, temp_basis_vectors); - } - else - { - return distance_function<kernel_type>(kernel); - } - } - - private: - - void do_train ( - const sample_type& x, - scalar_type cscale, - scalar_type xscale - ) - { - const scalar_type temp = cscale*alpha; - - if (temp != 0) - { - // compute w += xscale*x/temp - typename sample_type::const_iterator i; - for (i = x.begin(); i != x.end(); ++i) - { - w[i->first] += xscale*(i->second)/temp; - } - - alpha = temp; - } - else - { - // first compute w = cscale*alpha*w - for (typename std::map<unsigned long,scalar_type>::iterator i = w.begin(); i != w.end(); ++i) - { - i->second *= cscale*alpha; - } - - // now compute w += xscale*x - for (typename sample_type::const_iterator i = x.begin(); i != x.end(); ++i) - { - w[i->first] += xscale*(i->second); - } - - alpha = 1; - } - } - - bool my_remove_oldest_first; - - kernel_type kernel; - - std::map<unsigned long,scalar_type> w; - scalar_type alpha; - - - scalar_type my_tolerance; - unsigned long my_max_dictionary_size; - scalar_type samples_seen; - - }; - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// Overloads for when kernel_type == offset_kernel<sparse_linear_kernel> -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template <typename T> - class kcentroid<offset_kernel<sparse_linear_kernel<T> > > - { - - /*! - INITIAL VALUE - - x_extra == sqrt(kernel.offset) - - CONVENTION - - x_extra == sqrt(kernel.offset) - - w_extra == the value of the extra dimension tacked onto the - end of the w vector - !*/ - - typedef offset_kernel<sparse_linear_kernel<T> > kernel_type; - public: - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - - - explicit kcentroid ( - const kernel_type& kernel_, - scalar_type tolerance_ = 0.001, - unsigned long max_dictionary_size_ = 1000000, - bool remove_oldest_first_ = false - ) : - my_remove_oldest_first(remove_oldest_first_), - kernel(kernel_), - my_tolerance(tolerance_), - my_max_dictionary_size(max_dictionary_size_) - { - // make sure requires clause is not broken - DLIB_ASSERT(tolerance_ >= 0 && max_dictionary_size_ > 0, - "\tkcentroid::kcentroid()" - << "\n\t You have to give a positive tolerance" - << "\n\t this: " << this - << "\n\t tolerance_: " << tolerance_ - << "\n\t max_dictionary_size_: " << max_dictionary_size_ - ); - - x_extra = std::sqrt(kernel.offset); - - clear_dictionary(); - } - - scalar_type tolerance() const { return my_tolerance; } - unsigned long max_dictionary_size() const { return my_max_dictionary_size; } - bool remove_oldest_first () const { return my_remove_oldest_first; } - const kernel_type& get_kernel () const { return kernel; } - scalar_type samples_trained () const { return samples_seen; } - - void clear_dictionary () - { - samples_seen = 0; - w.clear(); - alpha = 0; - w_extra = x_extra; - } - - scalar_type operator() ( - const kcentroid& x - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(x.get_kernel() == get_kernel(), - "\tscalar_type kcentroid::operator()(const kcentroid& x)" - << "\n\tYou can only compare two kcentroid objects if they use the same kernel" - << "\n\tthis: " << this - ); - - if (samples_seen > 0) - { - scalar_type temp1 = distance_squared(alpha,w , x.alpha,x.w); - scalar_type temp2 = alpha*w_extra - x.alpha*x.w_extra; - return std::sqrt(temp1 + temp2*temp2); - } - else - { - return 0; - } - } - - scalar_type inner_product ( - const sample_type& x - ) const - { - if (samples_seen > 0) - return alpha*(dot(w,x) + w_extra*x_extra); - else - return 0; - } - - scalar_type inner_product ( - const kcentroid& x - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(x.get_kernel() == get_kernel(), - "\tscalar_type kcentroid::inner_product(const kcentroid& x)" - << "\n\tYou can only compare two kcentroid objects if they use the same kernel" - << "\n\tthis: " << this - ); - - if (samples_seen > 0 && x.samples_seen > 0) - return alpha*x.alpha*(dot(w,x.w) + w_extra*x.w_extra); - else - return 0; - } - - scalar_type squared_norm ( - ) const - { - if (samples_seen > 0) - return alpha*alpha*(length_squared(w) + w_extra*w_extra); - else - return 0; - } - - scalar_type operator() ( - const sample_type& x - ) const - { - if (samples_seen > 0) - { - scalar_type temp1 = distance_squared(1,x,alpha,w); - scalar_type temp2 = x_extra - alpha*w_extra; - return std::sqrt(temp1 + temp2*temp2); - } - else - { - return std::sqrt(length_squared(x) + x_extra*x_extra); - } - } - - scalar_type test_and_train ( - const sample_type& x - ) - { - ++samples_seen; - const scalar_type xscale = 1/samples_seen; - const scalar_type cscale = 1-xscale; - - do_train(x, cscale, xscale); - - return (*this)(x); - } - - void train ( - const sample_type& x - ) - { - ++samples_seen; - const scalar_type xscale = 1/samples_seen; - const scalar_type cscale = 1-xscale; - - do_train(x, cscale, xscale); - } - - scalar_type test_and_train ( - const sample_type& x, - scalar_type cscale, - scalar_type xscale - ) - { - ++samples_seen; - - do_train(x, cscale, xscale); - - return (*this)(x); - } - - void scale_by ( - scalar_type cscale - ) - { - alpha *= cscale; - w_extra *= cscale; - } - - void train ( - const sample_type& x, - scalar_type cscale, - scalar_type xscale - ) - { - ++samples_seen; - do_train(x, cscale, xscale); - } - - void swap ( - kcentroid& item - ) - { - exchange(my_remove_oldest_first, item.my_remove_oldest_first); - exchange(kernel, item.kernel); - exchange(w, item.w); - exchange(alpha, item.alpha); - exchange(w_extra, item.w_extra); - exchange(x_extra, item.x_extra); - exchange(my_tolerance, item.my_tolerance); - exchange(my_max_dictionary_size, item.my_max_dictionary_size); - exchange(samples_seen, item.samples_seen); - } - - unsigned long dictionary_size ( - ) const - { - if (samples_seen > 0) - { - if (std::abs(w_extra) > std::numeric_limits<scalar_type>::epsilon()) - return 1; - else - return 2; - } - else - { - return 0; - } - } - - friend void serialize(const kcentroid& item, std::ostream& out) - { - serialize(item.my_remove_oldest_first, out); - serialize(item.kernel, out); - serialize(item.w, out); - serialize(item.alpha, out); - serialize(item.w_extra, out); - serialize(item.x_extra, out); - serialize(item.my_tolerance, out); - serialize(item.my_max_dictionary_size, out); - serialize(item.samples_seen, out); - } - - friend void deserialize(kcentroid& item, std::istream& in) - { - deserialize(item.my_remove_oldest_first, in); - deserialize(item.kernel, in); - deserialize(item.w, in); - deserialize(item.alpha, in); - deserialize(item.w_extra, in); - deserialize(item.x_extra, in); - deserialize(item.my_tolerance, in); - deserialize(item.my_max_dictionary_size, in); - deserialize(item.samples_seen, in); - } - - distance_function<kernel_type> get_distance_function ( - ) const - { - if (samples_seen > 0) - { - typename distance_function<kernel_type>::sample_vector_type temp_basis_vectors; - typename distance_function<kernel_type>::scalar_vector_type temp_alpha; - - // What we are doing here needs a bit of explanation. The w vector - // has an implicit extra dimension tacked on to it with the value of w_extra. - // The kernel we are using takes normal vectors and implicitly tacks the value - // x_extra onto their end. So what we are doing here is scaling w so that - // the value it should have tacked onto it is x_scale. Note that we also - // adjust alpha so that the combination of alpha*w stays the same. - scalar_type scale; - - // if w_extra is basically greater than 0 - if (std::abs(w_extra) > std::numeric_limits<scalar_type>::epsilon()) - { - scale = (x_extra/w_extra); - temp_basis_vectors.set_size(1); - temp_alpha.set_size(1); - temp_basis_vectors(0) = sample_type(w.begin(), w.end()); - dlib::scale_by(temp_basis_vectors(0), scale); - temp_alpha(0) = alpha/scale; - } - else - { - // In this case w_extra is zero. So the only way we can get the same - // thing in the output basis vector set is by using two vectors - temp_basis_vectors.set_size(2); - temp_alpha.set_size(2); - temp_basis_vectors(0) = sample_type(w.begin(), w.end()); - dlib::scale_by(temp_basis_vectors(0), 2); - temp_alpha(0) = alpha; - temp_basis_vectors(1) = sample_type(w.begin(), w.end()); - temp_alpha(1) = -alpha; - } - - return distance_function<kernel_type>(temp_alpha, squared_norm(), kernel, temp_basis_vectors); - - } - else - { - return distance_function<kernel_type>(kernel); - } - - } - - private: - - void do_train ( - const sample_type& x, - scalar_type cscale, - scalar_type xscale - ) - { - - const scalar_type temp = cscale*alpha; - - if (temp != 0) - { - // compute w += xscale*x/temp - typename sample_type::const_iterator i; - for (i = x.begin(); i != x.end(); ++i) - { - w[i->first] += xscale*(i->second)/temp; - } - - w_extra = w_extra + xscale*x_extra/temp; - alpha = temp; - } - else - { - // first compute w = cscale*alpha*w - for (typename std::map<unsigned long,scalar_type>::iterator i = w.begin(); i != w.end(); ++i) - { - i->second *= cscale*alpha; - } - - // now compute w += xscale*x - for (typename sample_type::const_iterator i = x.begin(); i != x.end(); ++i) - { - w[i->first] += xscale*(i->second); - } - - - w_extra = cscale*alpha*w_extra + xscale*x_extra; - alpha = 1; - } - } - - bool my_remove_oldest_first; - - kernel_type kernel; - - std::map<unsigned long,scalar_type> w; - scalar_type alpha; - - scalar_type w_extra; - scalar_type x_extra; - - - scalar_type my_tolerance; - unsigned long my_max_dictionary_size; - scalar_type samples_seen; - - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_KCENTROId_OVERLOADS_ - - diff --git a/ml/dlib/dlib/svm/kernel.h b/ml/dlib/dlib/svm/kernel.h deleted file mode 100644 index 907420986..000000000 --- a/ml/dlib/dlib/svm/kernel.h +++ /dev/null @@ -1,569 +0,0 @@ -// Copyright (C) 2007 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_SVm_KERNEL -#define DLIB_SVm_KERNEL - -#include "kernel_abstract.h" -#include <cmath> -#include <limits> -#include <sstream> -#include "../matrix.h" -#include "../algs.h" -#include "../serialize.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < typename kernel_type > struct kernel_derivative; - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - struct radial_basis_kernel - { - typedef typename T::type scalar_type; - typedef T sample_type; - typedef typename T::mem_manager_type mem_manager_type; - - // T must be capable of representing a column vector. - COMPILE_TIME_ASSERT(T::NC == 1 || T::NC == 0); - - radial_basis_kernel(const scalar_type g) : gamma(g) {} - radial_basis_kernel() : gamma(0.1) {} - radial_basis_kernel( - const radial_basis_kernel& k - ) : gamma(k.gamma) {} - - - const scalar_type gamma; - - scalar_type operator() ( - const sample_type& a, - const sample_type& b - ) const - { - const scalar_type d = trans(a-b)*(a-b); - return std::exp(-gamma*d); - } - - radial_basis_kernel& operator= ( - const radial_basis_kernel& k - ) - { - const_cast<scalar_type&>(gamma) = k.gamma; - return *this; - } - - bool operator== ( - const radial_basis_kernel& k - ) const - { - return gamma == k.gamma; - } - }; - - template < - typename T - > - void serialize ( - const radial_basis_kernel<T>& item, - std::ostream& out - ) - { - try - { - serialize(item.gamma, out); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while serializing object of type radial_basis_kernel"); - } - } - - template < - typename T - > - void deserialize ( - radial_basis_kernel<T>& item, - std::istream& in - ) - { - typedef typename T::type scalar_type; - try - { - deserialize(const_cast<scalar_type&>(item.gamma), in); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while deserializing object of type radial_basis_kernel"); - } - } - - template < - typename T - > - struct kernel_derivative<radial_basis_kernel<T> > - { - typedef typename T::type scalar_type; - typedef T sample_type; - typedef typename T::mem_manager_type mem_manager_type; - - kernel_derivative(const radial_basis_kernel<T>& k_) : k(k_){} - - const sample_type& operator() (const sample_type& x, const sample_type& y) const - { - // return the derivative of the rbf kernel - temp = 2*k.gamma*(x-y)*k(x,y); - return temp; - } - - const radial_basis_kernel<T>& k; - mutable sample_type temp; - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - struct polynomial_kernel - { - typedef typename T::type scalar_type; - typedef T sample_type; - typedef typename T::mem_manager_type mem_manager_type; - - // T must be capable of representing a column vector. - COMPILE_TIME_ASSERT(T::NC == 1 || T::NC == 0); - - polynomial_kernel(const scalar_type g, const scalar_type c, const scalar_type d) : gamma(g), coef(c), degree(d) {} - polynomial_kernel() : gamma(1), coef(0), degree(1) {} - polynomial_kernel( - const polynomial_kernel& k - ) : gamma(k.gamma), coef(k.coef), degree(k.degree) {} - - typedef T type; - const scalar_type gamma; - const scalar_type coef; - const scalar_type degree; - - scalar_type operator() ( - const sample_type& a, - const sample_type& b - ) const - { - return std::pow(gamma*(trans(a)*b) + coef, degree); - } - - polynomial_kernel& operator= ( - const polynomial_kernel& k - ) - { - const_cast<scalar_type&>(gamma) = k.gamma; - const_cast<scalar_type&>(coef) = k.coef; - const_cast<scalar_type&>(degree) = k.degree; - return *this; - } - - bool operator== ( - const polynomial_kernel& k - ) const - { - return (gamma == k.gamma) && (coef == k.coef) && (degree == k.degree); - } - }; - - template < - typename T - > - void serialize ( - const polynomial_kernel<T>& item, - std::ostream& out - ) - { - try - { - serialize(item.gamma, out); - serialize(item.coef, out); - serialize(item.degree, out); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while serializing object of type polynomial_kernel"); - } - } - - template < - typename T - > - void deserialize ( - polynomial_kernel<T>& item, - std::istream& in - ) - { - typedef typename T::type scalar_type; - try - { - deserialize(const_cast<scalar_type&>(item.gamma), in); - deserialize(const_cast<scalar_type&>(item.coef), in); - deserialize(const_cast<scalar_type&>(item.degree), in); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while deserializing object of type polynomial_kernel"); - } - } - - template < - typename T - > - struct kernel_derivative<polynomial_kernel<T> > - { - typedef typename T::type scalar_type; - typedef T sample_type; - typedef typename T::mem_manager_type mem_manager_type; - - kernel_derivative(const polynomial_kernel<T>& k_) : k(k_){} - - const sample_type& operator() (const sample_type& x, const sample_type& y) const - { - // return the derivative of the rbf kernel - temp = k.degree*k.gamma*x*std::pow(k.gamma*(trans(x)*y) + k.coef, k.degree-1); - return temp; - } - - const polynomial_kernel<T>& k; - mutable sample_type temp; - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - struct sigmoid_kernel - { - typedef typename T::type scalar_type; - typedef T sample_type; - typedef typename T::mem_manager_type mem_manager_type; - - // T must be capable of representing a column vector. - COMPILE_TIME_ASSERT(T::NC == 1 || T::NC == 0); - - sigmoid_kernel(const scalar_type g, const scalar_type c) : gamma(g), coef(c) {} - sigmoid_kernel() : gamma(0.1), coef(-1.0) {} - sigmoid_kernel( - const sigmoid_kernel& k - ) : gamma(k.gamma), coef(k.coef) {} - - typedef T type; - const scalar_type gamma; - const scalar_type coef; - - scalar_type operator() ( - const sample_type& a, - const sample_type& b - ) const - { - return std::tanh(gamma*(trans(a)*b) + coef); - } - - sigmoid_kernel& operator= ( - const sigmoid_kernel& k - ) - { - const_cast<scalar_type&>(gamma) = k.gamma; - const_cast<scalar_type&>(coef) = k.coef; - return *this; - } - - bool operator== ( - const sigmoid_kernel& k - ) const - { - return (gamma == k.gamma) && (coef == k.coef); - } - }; - - template < - typename T - > - void serialize ( - const sigmoid_kernel<T>& item, - std::ostream& out - ) - { - try - { - serialize(item.gamma, out); - serialize(item.coef, out); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while serializing object of type sigmoid_kernel"); - } - } - - template < - typename T - > - void deserialize ( - sigmoid_kernel<T>& item, - std::istream& in - ) - { - typedef typename T::type scalar_type; - try - { - deserialize(const_cast<scalar_type&>(item.gamma), in); - deserialize(const_cast<scalar_type&>(item.coef), in); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while deserializing object of type sigmoid_kernel"); - } - } - - template < - typename T - > - struct kernel_derivative<sigmoid_kernel<T> > - { - typedef typename T::type scalar_type; - typedef T sample_type; - typedef typename T::mem_manager_type mem_manager_type; - - kernel_derivative(const sigmoid_kernel<T>& k_) : k(k_){} - - const sample_type& operator() (const sample_type& x, const sample_type& y) const - { - // return the derivative of the rbf kernel - temp = k.gamma*x*(1-std::pow(k(x,y),2)); - return temp; - } - - const sigmoid_kernel<T>& k; - mutable sample_type temp; - }; - -// ---------------------------------------------------------------------------------------- - - template <typename T> - struct linear_kernel - { - typedef typename T::type scalar_type; - typedef T sample_type; - typedef typename T::mem_manager_type mem_manager_type; - - // T must be capable of representing a column vector. - COMPILE_TIME_ASSERT(T::NC == 1 || T::NC == 0); - - scalar_type operator() ( - const sample_type& a, - const sample_type& b - ) const - { - return trans(a)*b; - } - - bool operator== ( - const linear_kernel& - ) const - { - return true; - } - }; - - template < - typename T - > - void serialize ( - const linear_kernel<T>& , - std::ostream& - ){} - - template < - typename T - > - void deserialize ( - linear_kernel<T>& , - std::istream& - ){} - - template < - typename T - > - struct kernel_derivative<linear_kernel<T> > - { - typedef typename T::type scalar_type; - typedef T sample_type; - typedef typename T::mem_manager_type mem_manager_type; - - kernel_derivative(const linear_kernel<T>& k_) : k(k_){} - - const sample_type& operator() (const sample_type& x, const sample_type& ) const - { - return x; - } - - const linear_kernel<T>& k; - }; - -// ---------------------------------------------------------------------------------------- - - template <typename T> - struct histogram_intersection_kernel - { - typedef typename T::type scalar_type; - typedef T sample_type; - typedef typename T::mem_manager_type mem_manager_type; - - scalar_type operator() ( - const sample_type& a, - const sample_type& b - ) const - { - scalar_type temp = 0; - for (long i = 0; i < a.size(); ++i) - { - temp += std::min(a(i), b(i)); - } - return temp; - } - - bool operator== ( - const histogram_intersection_kernel& - ) const - { - return true; - } - }; - - template < - typename T - > - void serialize ( - const histogram_intersection_kernel<T>& , - std::ostream& - ){} - - template < - typename T - > - void deserialize ( - histogram_intersection_kernel<T>& , - std::istream& - ){} - -// ---------------------------------------------------------------------------------------- - - template <typename T> - struct offset_kernel - { - typedef typename T::scalar_type scalar_type; - typedef typename T::sample_type sample_type; - typedef typename T::mem_manager_type mem_manager_type; - - offset_kernel(const T& k, const scalar_type& offset_ - ) : kernel(k), offset(offset_) {} - offset_kernel() : kernel(T()), offset(0.01) {} - offset_kernel( - const offset_kernel& k - ) : kernel(k.kernel), offset(k.offset) {} - - const T kernel; - const scalar_type offset; - - scalar_type operator() ( - const sample_type& a, - const sample_type& b - ) const - { - return kernel(a,b) + offset; - } - - offset_kernel& operator= ( - const offset_kernel& k - ) - { - const_cast<T&>(kernel) = k.kernel; - const_cast<scalar_type&>(offset) = k.offset; - return *this; - } - - bool operator== ( - const offset_kernel& k - ) const - { - return k.kernel == kernel && offset == k.offset; - } - }; - - template < - typename T - > - void serialize ( - const offset_kernel<T>& item, - std::ostream& out - ) - { - try - { - serialize(item.offset, out); - serialize(item.kernel, out); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while serializing object of type offset_kernel"); - } - } - - template < - typename T - > - void deserialize ( - offset_kernel<T>& item, - std::istream& in - ) - { - typedef typename offset_kernel<T>::scalar_type scalar_type; - try - { - deserialize(const_cast<scalar_type&>(item.offset), in); - deserialize(const_cast<T&>(item.kernel), in); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while deserializing object of type offset_kernel"); - } - } - - template < - typename T - > - struct kernel_derivative<offset_kernel<T> > - { - typedef typename T::scalar_type scalar_type; - typedef typename T::sample_type sample_type; - typedef typename T::mem_manager_type mem_manager_type; - - kernel_derivative(const offset_kernel<T>& k) : der(k.kernel){} - - const sample_type operator() (const sample_type& x, const sample_type& y) const - { - return der(x,y); - } - - kernel_derivative<T> der; - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SVm_KERNEL - - diff --git a/ml/dlib/dlib/svm/kernel_abstract.h b/ml/dlib/dlib/svm/kernel_abstract.h deleted file mode 100644 index f72430eb8..000000000 --- a/ml/dlib/dlib/svm/kernel_abstract.h +++ /dev/null @@ -1,681 +0,0 @@ -// Copyright (C) 2007 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_SVm_KERNEL_ABSTRACT_ -#ifdef DLIB_SVm_KERNEL_ABSTRACT_ - -#include <cmath> -#include <limits> -#include <sstream> -#include "../matrix/matrix_abstract.h" -#include "../algs.h" -#include "../serialize.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -/*!A Kernel_Function_Objects */ -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - /*! - WHAT IS A KERNEL FUNCTION OBJECT? - In the context of the dlib library documentation a kernel function object - is an object with an interface with the following properties: - - a public typedef named sample_type - - a public typedef named scalar_type which should be a float, double, or - long double type. - - an overloaded operator() that operates on two items of sample_type - and returns a scalar_type. - (e.g. scalar_type val = kernel_function(sample1,sample2); - would be a valid expression) - - a public typedef named mem_manager_type that is an implementation of - dlib/memory_manager/memory_manager_kernel_abstract.h or - dlib/memory_manager_global/memory_manager_global_kernel_abstract.h or - dlib/memory_manager_stateless/memory_manager_stateless_kernel_abstract.h - - an overloaded == operator that tells you if two kernels are - identical or not. - - THREAD SAFETY - For a kernel function to be threadsafe it means that it must be safe to - evaluate an expression like val = kernel_function(sample1,sample2) - simultaneously from multiple threads, even when the threads operate on the same - object instances (i.e. kernel_function, sample1, and sample2). The most common - way to make this safe is to ensure that the kernel function does not mutate any - data, either in itself or in its arguments. - - For examples of kernel functions see the following objects - (e.g. the radial_basis_kernel). - !*/ - - template < - typename T - > - struct radial_basis_kernel - { - /*! - REQUIREMENTS ON T - T must be a dlib::matrix object - - WHAT THIS OBJECT REPRESENTS - This object represents a radial basis function kernel - - THREAD SAFETY - This kernel is threadsafe. - !*/ - - typedef typename T::type scalar_type; - typedef T sample_type; - typedef typename T::mem_manager_type mem_manager_type; - - const scalar_type gamma; - - radial_basis_kernel( - ); - /*! - ensures - - #gamma == 0.1 - !*/ - - radial_basis_kernel( - const radial_basis_kernel& k - ); - /*! - ensures - - #gamma == k.gamma - !*/ - - radial_basis_kernel( - const scalar_type g - ); - /*! - ensures - - #gamma == g - !*/ - - scalar_type operator() ( - const sample_type& a, - const sample_type& b - ) const; - /*! - requires - - a.nc() == 1 - - b.nc() == 1 - - a.nr() == b.nr() - ensures - - returns exp(-gamma * ||a-b||^2) - !*/ - - radial_basis_kernel& operator= ( - const radial_basis_kernel& k - ); - /*! - ensures - - #gamma = k.gamma - - returns *this - !*/ - - bool operator== ( - const radial_basis_kernel& k - ) const; - /*! - ensures - - if (k and *this are identical) then - - returns true - - else - - returns false - !*/ - - }; - - template < - typename T - > - void serialize ( - const radial_basis_kernel<T>& item, - std::ostream& out - ); - /*! - provides serialization support for radial_basis_kernel - !*/ - - template < - typename T - > - void deserialize ( - radial_basis_kernel<T>& item, - std::istream& in - ); - /*! - provides deserialization support for radial_basis_kernel - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - struct sigmoid_kernel - { - /*! - REQUIREMENTS ON T - T must be a dlib::matrix object - - WHAT THIS OBJECT REPRESENTS - This object represents a sigmoid kernel - - THREAD SAFETY - This kernel is threadsafe. - !*/ - - typedef typename T::type scalar_type; - typedef T sample_type; - typedef typename T::mem_manager_type mem_manager_type; - - const scalar_type gamma; - const scalar_type coef; - - sigmoid_kernel( - ); - /*! - ensures - - #gamma == 0.1 - - #coef == -1.0 - !*/ - - sigmoid_kernel( - const sigmoid_kernel& k - ); - /*! - ensures - - #gamma == k.gamma - - #coef == k.coef - !*/ - - sigmoid_kernel( - const scalar_type g, - const scalar_type c - ); - /*! - ensures - - #gamma == g - - #coef == c - !*/ - - scalar_type operator() ( - const sample_type& a, - const sample_type& b - ) const; - /*! - requires - - a.nc() == 1 - - b.nc() == 1 - - a.nr() == b.nr() - ensures - - returns tanh(gamma*trans(a)*b + coef) - !*/ - - sigmoid_kernel& operator= ( - const sigmoid_kernel& k - ); - /*! - ensures - - #gamma = k.gamma - - #coef = k.coef - - returns *this - !*/ - - bool operator== ( - const sigmoid_kernel& k - ) const; - /*! - ensures - - if (k and *this are identical) then - - returns true - - else - - returns false - !*/ - }; - - template < - typename T - > - void serialize ( - const sigmoid_kernel<T>& item, - std::ostream& out - ); - /*! - provides serialization support for sigmoid_kernel - !*/ - - template < - typename T - > - void deserialize ( - sigmoid_kernel<T>& item, - std::istream& in - ); - /*! - provides deserialization support for sigmoid_kernel - !*/ - - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - struct polynomial_kernel - { - /*! - REQUIREMENTS ON T - T must be a dlib::matrix object - - WHAT THIS OBJECT REPRESENTS - This object represents a polynomial kernel - - THREAD SAFETY - This kernel is threadsafe. - !*/ - - typedef typename T::type scalar_type; - typedef T sample_type; - typedef typename T::mem_manager_type mem_manager_type; - - const scalar_type gamma; - const scalar_type coef; - const scalar_type degree; - - polynomial_kernel( - ); - /*! - ensures - - #gamma == 1 - - #coef == 0 - - #degree == 1 - !*/ - - polynomial_kernel( - const polynomial_kernel& k - ); - /*! - ensures - - #gamma == k.gamma - - #coef == k.coef - - #degree == k.degree - !*/ - - polynomial_kernel( - const scalar_type g, - const scalar_type c, - const scalar_type d - ); - /*! - ensures - - #gamma == g - - #coef == c - - #degree == d - !*/ - - scalar_type operator() ( - const sample_type& a, - const sample_type& b - ) const; - /*! - requires - - a.nc() == 1 - - b.nc() == 1 - - a.nr() == b.nr() - ensures - - returns pow(gamma*trans(a)*b + coef, degree) - !*/ - - polynomial_kernel& operator= ( - const polynomial_kernel& k - ); - /*! - ensures - - #gamma = k.gamma - - #coef = k.coef - - #degree = k.degree - - returns *this - !*/ - - bool operator== ( - const polynomial_kernel& k - ) const; - /*! - ensures - - if (k and *this are identical) then - - returns true - - else - - returns false - !*/ - }; - - template < - typename T - > - void serialize ( - const polynomial_kernel<T>& item, - std::ostream& out - ); - /*! - provides serialization support for polynomial_kernel - !*/ - - template < - typename T - > - void deserialize ( - polynomial_kernel<T>& item, - std::istream& in - ); - /*! - provides deserialization support for polynomial_kernel - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - struct linear_kernel - { - /*! - REQUIREMENTS ON T - T must be a dlib::matrix object - - WHAT THIS OBJECT REPRESENTS - This object represents a linear function kernel - - THREAD SAFETY - This kernel is threadsafe. - !*/ - - typedef typename T::type scalar_type; - typedef T sample_type; - typedef typename T::mem_manager_type mem_manager_type; - - scalar_type operator() ( - const sample_type& a, - const sample_type& b - ) const; - /*! - requires - - a.nc() == 1 - - b.nc() == 1 - - a.nr() == b.nr() - ensures - - returns trans(a)*b - !*/ - - bool operator== ( - const linear_kernel& k - ) const; - /*! - ensures - - returns true - !*/ - }; - - template < - typename T - > - void serialize ( - const linear_kernel<T>& item, - std::ostream& out - ); - /*! - provides serialization support for linear_kernel - !*/ - - template < - typename T - > - void deserialize ( - linear_kernel<T>& item, - std::istream& in - ); - /*! - provides deserialization support for linear_kernel - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - struct histogram_intersection_kernel - { - /*! - REQUIREMENTS ON T - T must be a dlib::matrix object - - WHAT THIS OBJECT REPRESENTS - This object represents a histogram intersection kernel kernel - - THREAD SAFETY - This kernel is threadsafe. - !*/ - - typedef typename T::type scalar_type; - typedef T sample_type; - typedef typename T::mem_manager_type mem_manager_type; - - scalar_type operator() ( - const sample_type& a, - const sample_type& b - ) const; - /*! - requires - - is_vector(a) - - is_vector(b) - - a.size() == b.size() - - min(a) >= 0 - - min(b) >= 0 - ensures - - returns sum over all i: std::min(a(i), b(i)) - !*/ - - bool operator== ( - const histogram_intersection_kernel& k - ) const; - /*! - ensures - - returns true - !*/ - }; - - template < - typename T - > - void serialize ( - const histogram_intersection_kernel<T>& item, - std::ostream& out - ); - /*! - provides serialization support for histogram_intersection_kernel - !*/ - - template < - typename T - > - void deserialize ( - histogram_intersection_kernel<T>& item, - std::istream& in - ); - /*! - provides deserialization support for histogram_intersection_kernel - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - struct offset_kernel - { - /*! - REQUIREMENTS ON T - T must be a kernel object (e.g. radial_basis_kernel, polynomial_kernel, etc.) - - WHAT THIS OBJECT REPRESENTS - This object represents a kernel with a fixed value offset - added to it. - - THREAD SAFETY - This kernel is threadsafe. - !*/ - - typedef typename T::scalar_type scalar_type; - typedef typename T::sample_type sample_type; - typedef typename T::mem_manager_type mem_manager_type; - - const T kernel; - const scalar_type offset; - - offset_kernel( - ); - /*! - ensures - - #offset == 0.01 - !*/ - - offset_kernel( - const offset_kernel& k - ); - /*! - ensures - - #offset == k.offset - - #kernel == k.kernel - !*/ - - offset_kernel( - const T& k, - const scalar_type& off - ); - /*! - ensures - - #kernel == k - - #offset == off - !*/ - - scalar_type operator() ( - const sample_type& a, - const sample_type& b - ) const; - /*! - ensures - - returns kernel(a,b) + offset - !*/ - - offset_kernel& operator= ( - const offset_kernel& k - ); - /*! - ensures - - #offset == k.offset - - #kernel == k.kernel - !*/ - - bool operator== ( - const offset_kernel& k - ) const; - /*! - ensures - - if (k and *this are identical) then - - returns true - - else - - returns false - !*/ - }; - - template < - typename T - > - void serialize ( - const offset_kernel<T>& item, - std::ostream& out - ); - /*! - provides serialization support for offset_kernel - !*/ - - template < - typename T - > - void deserialize ( - offset_kernel<T>& item, - std::istream& in - ); - /*! - provides deserialization support for offset_kernel - !*/ - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename kernel_type - > - struct kernel_derivative - { - /*! - REQUIREMENTS ON kernel_type - kernel_type must be one of the following kernel types: - - radial_basis_kernel - - polynomial_kernel - - sigmoid_kernel - - linear_kernel - - offset_kernel - - WHAT THIS OBJECT REPRESENTS - This is a function object that computes the derivative of a kernel - function object. - - THREAD SAFETY - It is always safe to use distinct instances of this object in different - threads. However, when a single instance is shared between threads then - the following rules apply: - Instances of this object are allowed to have a mutable cache which is - used by const member functions. Therefore, it is not safe to use one - instance of this object from multiple threads (unless protected by a - mutex). - !*/ - - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - - kernel_derivative( - const kernel_type& k_ - ); - /*! - ensures - - this object will return derivatives of the kernel object k_ - - #k == k_ - !*/ - - const sample_type operator() ( - const sample_type& x, - const sample_type& y - ) const; - /*! - ensures - - returns the derivative of k with respect to y. - !*/ - - const kernel_type& k; - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SVm_KERNEL_ABSTRACT_ - - - diff --git a/ml/dlib/dlib/svm/kernel_matrix.h b/ml/dlib/dlib/svm/kernel_matrix.h deleted file mode 100644 index f6e1e0b90..000000000 --- a/ml/dlib/dlib/svm/kernel_matrix.h +++ /dev/null @@ -1,268 +0,0 @@ -// Copyright (C) 2009 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_SVm_KERNEL_MATRIX_ -#define DLIB_SVm_KERNEL_MATRIX_ - -#include <vector> -#include "kernel_matrix_abstract.h" -#include "../matrix.h" -#include "../algs.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - namespace impl - { - template <typename kernel_type, typename T> - inline const typename T::type& access ( const matrix_exp<T>& m, long i) - { - return m(i); - } - - // bind to anything that looks like an array and isn't a matrix - template <typename kernel_type, typename T> - inline const typename disable_if<is_matrix<T>,typename T::type>::type& access ( const T& m, long i) - { - return m[i]; - } - - // Only use this function if T isn't a std::pair because in that case the entire vector is - // probably itself a sparse sample. - template <typename kernel_type, typename T, typename alloc> - inline typename disable_if<is_pair<T>,const T&>::type access ( const std::vector<T,alloc>& m, long i) - { - return m[i]; - } - - // Only use this function if T isn't a std::pair because in that case the entire vector is - // probably a sparse sample. - template <typename kernel_type, typename T, typename alloc> - inline typename disable_if<is_pair<T>,const T&>::type access ( const std_vector_c<T,alloc>& m, long i) - { - return m[i]; - } - - template <typename kernel_type> - inline const typename kernel_type::sample_type& access ( - const typename kernel_type::sample_type& samp, - long - ) - { - return samp; - } - - // -------------------------------------------- - - template <typename kernel_type, typename T> - inline typename disable_if<is_same_type<T,typename kernel_type::sample_type>,unsigned long>::type - size ( const T& m) - { - return m.size(); - } - - template <typename kernel_type> - inline size_t size ( - const typename kernel_type::sample_type& - ) - { - return 1; - } - - // -------------------------------------------- - - template <typename T> - typename disable_if<is_matrix<T> >::type assert_is_vector(const T&) - {} - - template <typename T> - // This funny #ifdef thing is here because gcc sometimes gives a warning - // about v being unused otherwise. -#ifdef ENABLE_ASSERTS - void assert_is_vector(const matrix_exp<T>& v) -#else - void assert_is_vector(const matrix_exp<T>& ) -#endif - { - // make sure requires clause is not broken - DLIB_ASSERT(is_vector(v) == true, - "\tconst matrix_exp kernel_matrix()" - << "\n\t You have to supply this function with row or column vectors" - << "\n\t v.nr(): " << v.nr() - << "\n\t v.nc(): " << v.nc() - ); - } - - } - - template <typename K, typename vect_type1, typename vect_type2> - struct op_kern_mat - { - op_kern_mat( - const K& kern_, - const vect_type1& vect1_, - const vect_type2& vect2_ - ) : - kern(kern_), - vect1(vect1_), - vect2(vect2_) - { - // make sure the requires clauses get checked eventually - impl::assert_is_vector(vect1); - impl::assert_is_vector(vect2); - } - - const K& kern; - const vect_type1& vect1; - const vect_type2& vect2; - - typedef typename K::scalar_type type; - - const static long cost = 100; - const static long NR = (is_same_type<vect_type1,typename K::sample_type>::value) ? 1 : 0; - const static long NC = (is_same_type<vect_type2,typename K::sample_type>::value) ? 1 : 0; - - typedef const type const_ret_type; - typedef typename K::mem_manager_type mem_manager_type; - typedef row_major_layout layout_type; - - const_ret_type apply (long r, long c ) const - { - return kern(impl::access<K>(vect1,r), impl::access<K>(vect2,c)); - } - - long nr () const { return impl::size<K>(vect1); } - long nc () const { return impl::size<K>(vect2); } - - template <typename U> bool aliases ( const matrix_exp<U>& item ) const { return alias_helper(item.ref()); } - template <typename U> bool destructively_aliases ( const matrix_exp<U>& item ) const { return alias_helper(item.ref()); } - - template <typename U> bool alias_helper ( const U& ) const { return false; } - - typedef typename K::sample_type samp_type; - - // Say we destructively alias if one of the vect* objects is actually item. - bool alias_helper (const samp_type& item ) const { return are_same(item, vect1) || are_same(item, vect2); } - template <typename U> bool are_same (const samp_type& , const U& ) const { return false; } - bool are_same (const samp_type& a, const samp_type& b) const { return (&a == &b); } - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename K, - typename V1, - typename V2 - > - const matrix_op<op_kern_mat<K,V1,V2> > kernel_matrix ( - const K& kern, - const V1& v1, - const V2& v2 - ) - { - typedef op_kern_mat<K,V1,V2> op; - return matrix_op<op>(op(kern,v1,v2)); - } - -// ---------------------------------------------------------------------------------------- - - /* - It is possible to implement the kernel_matrix() operator with just one operator - class but treating the version that takes only a single vector separately - leads to more efficient output by gcc in certain instances. - */ - template <typename K, typename vect_type1> - struct op_kern_mat_single - { - op_kern_mat_single( - const K& kern_, - const vect_type1& vect1_ - ) : - kern(kern_), - vect1(vect1_) - { - // make sure the requires clauses get checked eventually - impl::assert_is_vector(vect1); - } - - const K& kern; - const vect_type1& vect1; - - typedef typename K::scalar_type type; - - const static long cost = 100; - const static long NR = (is_same_type<vect_type1,typename K::sample_type>::value) ? 1 : 0; - const static long NC = (is_same_type<vect_type1,typename K::sample_type>::value) ? 1 : 0; - - typedef const type const_ret_type; - typedef typename K::mem_manager_type mem_manager_type; - typedef row_major_layout layout_type; - - const_ret_type apply (long r, long c ) const - { - return kern(impl::access<K>(vect1,r), impl::access<K>(vect1,c)); - } - - long nr () const { return impl::size<K>(vect1); } - long nc () const { return impl::size<K>(vect1); } - - template <typename U> bool aliases ( const matrix_exp<U>& item ) const { return alias_helper(item.ref()); } - template <typename U> bool destructively_aliases ( const matrix_exp<U>& item ) const { return alias_helper(item.ref()); } - - template <typename U> bool alias_helper ( const U& ) const { return false; } - - typedef typename K::sample_type samp_type; - - // Say we destructively alias if vect1 is actually item. - bool alias_helper (const samp_type& item ) const { return are_same(item, vect1); } - template <typename U> bool are_same (const samp_type& , const U& ) const { return false; } - bool are_same (const samp_type& a, const samp_type& b) const { return (&a == &b); } - }; - - template < - typename K, - typename V - > - const matrix_op<op_kern_mat_single<K,V> > kernel_matrix ( - const K& kern, - const V& v - ) - { - typedef op_kern_mat_single<K,V> op; - return matrix_op<op>(op(kern,v)); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_dest_type, - typename K, - typename V - > - inline void matrix_assign ( - matrix_dest_type& dest, - const matrix_exp<matrix_op<op_kern_mat_single<K,V> > >& src - ) - /*! - Overload matrix assignment so that when a kernel_matrix expression - gets assigned it only evaluates half the kernel matrix (since it is symmetric) - !*/ - { - for (long r = 0; r < src.nr(); ++r) - { - for (long c = r; c < src.nc(); ++c) - { - dest(r,c) = dest(c,r) = src(r,c); - } - } - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SVm_KERNEL_MATRIX_ - diff --git a/ml/dlib/dlib/svm/kernel_matrix_abstract.h b/ml/dlib/dlib/svm/kernel_matrix_abstract.h deleted file mode 100644 index 4aa4b1ce2..000000000 --- a/ml/dlib/dlib/svm/kernel_matrix_abstract.h +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright (C) 2009 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_SVm_KERNEL_MATRIX_ABSTRACT_ -#ifdef DLIB_SVm_KERNEL_MATRIX_ABSTRACT_ - -#include <vector> -#include "kernel_abstract.h" -#include "../matrix/matrix_abstract.h" -#include "../algs.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename kernel_type, - typename V - > - const matrix_exp kernel_matrix ( - const kernel_type& kernel, - const V& v - ); - /*! - requires - - kernel == a kernel function object as defined by the file dlib/svm/kernel_abstract.h. - This kernel must also be capable of operating on the contents of v. - - V == dlib::matrix, std::vector, dlib::std_vector_c, dlib::random_subset_selector, - dlib::linearly_independent_subset_finder, or kernel_type::sample_type. - - if (V is a dlib::matrix) then - - is_vector(v) == true - ensures - - if (V is of type kernel_type::sample_type) then - - returns a matrix R such that: - - R::type == kernel_type::scalar_type - - R.size() == 1 - - R(0,0) == kernel(v,v) - - else - - returns a matrix R such that: - - R::type == kernel_type::scalar_type - - R is a square matrix of v.size() rows by v.size() columns - - for all valid r and c: - - R(r,c) == kernel(v(r), v(c)) - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename kernel_type, - typename V1, - typename V2 - > - const matrix_exp kernel_matrix ( - const kernel_type& kernel, - const V1& v1, - const V2& v2 - ); - /*! - requires - - kernel == a kernel function object as defined by the file dlib/svm/kernel_abstract.h - This kernel must also be capable of operating on the contents of v1 and v2. - - V1 == dlib::matrix, std::vector, dlib::std_vector_c, dlib::random_subset_selector, - dlib::linearly_independent_subset_finder, or kernel_type::sample_type. - - V2 == dlib::matrix, std::vector, dlib::std_vector_c, dlib::random_subset_selector, - dlib::linearly_independent_subset_finder, or kernel_type::sample_type. - - if (V1 is a dlib::matrix) then - - is_vector(v1) == true - - if (V2 is a dlib::matrix) then - - is_vector(v2) == true - ensures - - if (V1 and V2 are of type kernel_type::sample_type) then - - returns a matrix R such that: - - R::type == kernel_type::scalar_type - - R.size() == 1 - - R(0,0) == kernel(v1,v2) - - else if (V1 is of type kernel_type::sample_type) then - - returns a matrix R such that: - - R::type == kernel_type::scalar_type - - R.nr() == 1 - - R.nc() == v2.size() - - for all valid c: - - R(0,c) == kernel(v1, v2(c)) - - else if (V2 is of type kernel_type::sample_type) then - - returns a matrix R such that: - - R::type == kernel_type::scalar_type - - R.nr() == v1.size() - - R.nc() == 1 - - for all valid r: - - R(r,0) == kernel(v1(r), v2) - - else - - returns a matrix R such that: - - R::type == kernel_type::scalar_type - - R.nr() == v1.size() - - R.nc() == v2.size() - - for all valid r and c: - - R(r,c) == kernel(v1(r), v2(c)) - - - A note about aliasing (see the examples/matrix_expressions_ex.cpp example program - for a discussion of what aliasing is in the context of the dlib::matrix): - kernel_matrix() expressions can detect aliasing of an argument if that - argument is of type kernel_type::sample_type. However, it can't detect - aliasing though std::vectors or other "list of sample type" container class - arguments. This means that it is safe to assign a kernel_matrix() expression - to a sample_type if V1 or V2 are of sample_type but not safe otherwise. However, - since the latter case results in a general n by m matrix rather than a column - or row vector you shouldn't ever be doing it anyway. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SVm_KERNEL_MATRIX_ABSTRACT_ - diff --git a/ml/dlib/dlib/svm/kkmeans.h b/ml/dlib/dlib/svm/kkmeans.h deleted file mode 100644 index 4c72106d8..000000000 --- a/ml/dlib/dlib/svm/kkmeans.h +++ /dev/null @@ -1,654 +0,0 @@ -// Copyright (C) 2008 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_KKMEANs_ -#define DLIB_KKMEANs_ - -#include <cmath> -#include <vector> - -#include "../matrix/matrix_abstract.h" -#include "../algs.h" -#include "../serialize.h" -#include "kernel.h" -#include "../array.h" -#include "kcentroid.h" -#include "kkmeans_abstract.h" -#include "../noncopyable.h" - -namespace dlib -{ - - template < - typename kernel_type - > - class kkmeans : public noncopyable - { - public: - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - - kkmeans ( - const kcentroid<kernel_type>& kc_ - ): - kc(kc_), - min_change(0.01) - { - set_number_of_centers(1); - } - - ~kkmeans() - { - } - - const kernel_type& get_kernel ( - ) const - { - return kc.get_kernel(); - } - - void set_kcentroid ( - const kcentroid<kernel_type>& kc_ - ) - { - kc = kc_; - set_number_of_centers(number_of_centers()); - } - - const kcentroid<kernel_type>& get_kcentroid ( - unsigned long i - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(i < number_of_centers(), - "\tkcentroid kkmeans::get_kcentroid(i)" - << "\n\tYou have given an invalid value for i" - << "\n\ti: " << i - << "\n\tnumber_of_centers(): " << number_of_centers() - << "\n\tthis: " << this - ); - - return *centers[i]; - } - - void set_number_of_centers ( - unsigned long num - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(num > 0, - "\tvoid kkmeans::set_number_of_centers()" - << "\n\tYou can't set the number of centers to zero" - << "\n\tthis: " << this - ); - - centers.set_max_size(num); - centers.set_size(num); - - for (unsigned long i = 0; i < centers.size(); ++i) - { - centers[i].reset(new kcentroid<kernel_type>(kc)); - } - } - - unsigned long number_of_centers ( - ) const - { - return centers.size(); - } - - template <typename T, typename U> - void train ( - const T& samples, - const U& initial_centers, - long max_iter = 1000 - ) - { - do_train(mat(samples),mat(initial_centers),max_iter); - } - - unsigned long operator() ( - const sample_type& sample - ) const - { - unsigned long label = 0; - scalar_type best_score = (*centers[0])(sample); - - // figure out which center the given sample is closest too - for (unsigned long i = 1; i < centers.size(); ++i) - { - scalar_type temp = (*centers[i])(sample); - if (temp < best_score) - { - label = i; - best_score = temp; - } - } - - return label; - } - - void set_min_change ( - scalar_type min_change_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( 0 <= min_change_ < 1, - "\tvoid kkmeans::set_min_change()" - << "\n\tInvalid arguments to this function" - << "\n\tthis: " << this - << "\n\tmin_change_: " << min_change_ - ); - min_change = min_change_; - } - - const scalar_type get_min_change ( - ) const - { - return min_change; - } - - void swap ( - kkmeans& item - ) - { - centers.swap(item.centers); - kc.swap(item.kc); - assignments.swap(item.assignments); - exchange(min_change, item.min_change); - } - - friend void serialize(const kkmeans& item, std::ostream& out) - { - serialize(item.centers.size(),out); - for (unsigned long i = 0; i < item.centers.size(); ++i) - { - serialize(*item.centers[i], out); - } - serialize(item.kc, out); - serialize(item.min_change, out); - } - - friend void deserialize(kkmeans& item, std::istream& in) - { - unsigned long num; - deserialize(num, in); - item.centers.resize(num); - for (unsigned long i = 0; i < item.centers.size(); ++i) - { - std::unique_ptr<kcentroid<kernel_type> > temp(new kcentroid<kernel_type>(kernel_type())); - deserialize(*temp, in); - item.centers[i].swap(temp); - } - - deserialize(item.kc, in); - deserialize(item.min_change, in); - } - - private: - - template <typename matrix_type, typename matrix_type2> - void do_train ( - const matrix_type& samples, - const matrix_type2& initial_centers, - long max_iter = 1000 - ) - { - COMPILE_TIME_ASSERT((is_same_type<typename matrix_type::type, sample_type>::value)); - COMPILE_TIME_ASSERT((is_same_type<typename matrix_type2::type, sample_type>::value)); - - // make sure requires clause is not broken - DLIB_ASSERT(samples.nc() == 1 && initial_centers.nc() == 1 && - initial_centers.nr() == static_cast<long>(number_of_centers()), - "\tvoid kkmeans::train()" - << "\n\tInvalid arguments to this function" - << "\n\tthis: " << this - << "\n\tsamples.nc(): " << samples.nc() - << "\n\tinitial_centers.nc(): " << initial_centers.nc() - << "\n\tinitial_centers.nr(): " << initial_centers.nr() - ); - - // clear out the old data and initialize the centers - for (unsigned long i = 0; i < centers.size(); ++i) - { - centers[i]->clear_dictionary(); - centers[i]->train(initial_centers(i)); - } - - assignments.resize(samples.size()); - - bool assignment_changed = true; - - // loop until the centers stabilize - long count = 0; - const unsigned long min_num_change = static_cast<unsigned long>(min_change*samples.size()); - unsigned long num_changed = min_num_change; - while (assignment_changed && count < max_iter && num_changed >= min_num_change) - { - ++count; - assignment_changed = false; - num_changed = 0; - - // loop over all the samples and assign them to their closest centers - for (long i = 0; i < samples.size(); ++i) - { - // find the best center - unsigned long best_center = 0; - scalar_type best_score = (*centers[0])(samples(i)); - for (unsigned long c = 1; c < centers.size(); ++c) - { - scalar_type temp = (*centers[c])(samples(i)); - if (temp < best_score) - { - best_score = temp; - best_center = c; - } - } - - // if the current sample changed centers then make note of that - if (assignments[i] != best_center) - { - assignments[i] = best_center; - assignment_changed = true; - ++num_changed; - } - } - - if (assignment_changed) - { - // now clear out the old data - for (unsigned long i = 0; i < centers.size(); ++i) - centers[i]->clear_dictionary(); - - // recalculate the cluster centers - for (unsigned long i = 0; i < assignments.size(); ++i) - centers[assignments[i]]->train(samples(i)); - } - - } - - - } - - array<std::unique_ptr<kcentroid<kernel_type> > > centers; - kcentroid<kernel_type> kc; - scalar_type min_change; - - // temp variables - array<unsigned long> assignments; - }; - -// ---------------------------------------------------------------------------------------- - - template <typename kernel_type> - void swap(kkmeans<kernel_type>& a, kkmeans<kernel_type>& b) - { a.swap(b); } - -// ---------------------------------------------------------------------------------------- - - struct dlib_pick_initial_centers_data - { - dlib_pick_initial_centers_data():idx(0), dist(std::numeric_limits<double>::infinity()){} - long idx; - double dist; - bool operator< (const dlib_pick_initial_centers_data& d) const { return dist < d.dist; } - }; - - template < - typename vector_type1, - typename vector_type2, - typename kernel_type - > - void pick_initial_centers( - long num_centers, - vector_type1& centers, - const vector_type2& samples, - const kernel_type& k, - double percentile = 0.01 - ) - { - /* - This function is basically just a non-randomized version of the kmeans++ algorithm - described in the paper: - kmeans++: The Advantages of Careful Seeding by Arthur and Vassilvitskii - - */ - - - // make sure requires clause is not broken - DLIB_ASSERT(num_centers > 1 && 0 <= percentile && percentile < 1 && samples.size() > 1, - "\tvoid pick_initial_centers()" - << "\n\tYou passed invalid arguments to this function" - << "\n\tnum_centers: " << num_centers - << "\n\tpercentile: " << percentile - << "\n\tsamples.size(): " << samples.size() - ); - - std::vector<dlib_pick_initial_centers_data> scores(samples.size()); - std::vector<dlib_pick_initial_centers_data> scores_sorted(samples.size()); - centers.clear(); - - // pick the first sample as one of the centers - centers.push_back(samples[0]); - - const long best_idx = static_cast<long>(std::max(0.0,samples.size() - samples.size()*percentile - 1)); - - // pick the next center - for (long i = 0; i < num_centers-1; ++i) - { - // Loop over the samples and compare them to the most recent center. Store - // the distance from each sample to its closest center in scores. - const double k_cc = k(centers[i], centers[i]); - for (unsigned long s = 0; s < samples.size(); ++s) - { - // compute the distance between this sample and the current center - const double dist = k_cc + k(samples[s],samples[s]) - 2*k(samples[s], centers[i]); - - if (dist < scores[s].dist) - { - scores[s].dist = dist; - scores[s].idx = s; - } - } - - scores_sorted = scores; - - // now find the winning center and add it to centers. It is the one that is - // far away from all the other centers. - sort(scores_sorted.begin(), scores_sorted.end()); - centers.push_back(samples[scores_sorted[best_idx].idx]); - } - - } - -// ---------------------------------------------------------------------------------------- - - template < - typename vector_type1, - typename vector_type2 - > - void pick_initial_centers( - long num_centers, - vector_type1& centers, - const vector_type2& samples, - double percentile = 0.01 - ) - { - typedef typename vector_type1::value_type sample_type; - linear_kernel<sample_type> kern; - pick_initial_centers(num_centers, centers, samples, kern, percentile); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename array_type, - typename sample_type, - typename alloc - > - void find_clusters_using_kmeans ( - const array_type& samples, - std::vector<sample_type, alloc>& centers, - unsigned long max_iter = 1000 - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(samples.size() > 0 && centers.size() > 0, - "\tvoid find_clusters_using_kmeans()" - << "\n\tYou passed invalid arguments to this function" - << "\n\t samples.size(): " << samples.size() - << "\n\t centers.size(): " << centers.size() - ); - -#ifdef ENABLE_ASSERTS - { - const long nr = samples[0].nr(); - const long nc = samples[0].nc(); - for (unsigned long i = 0; i < samples.size(); ++i) - { - DLIB_ASSERT(is_vector(samples[i]) && samples[i].nr() == nr && samples[i].nc() == nc, - "\tvoid find_clusters_using_kmeans()" - << "\n\t You passed invalid arguments to this function" - << "\n\t is_vector(samples[i]): " << is_vector(samples[i]) - << "\n\t samples[i].nr(): " << samples[i].nr() - << "\n\t nr: " << nr - << "\n\t samples[i].nc(): " << samples[i].nc() - << "\n\t nc: " << nc - << "\n\t i: " << i - ); - } - } -#endif - - typedef typename sample_type::type scalar_type; - - sample_type zero(centers[0]); - set_all_elements(zero, 0); - - std::vector<unsigned long> center_element_count; - - // tells which center a sample belongs to - std::vector<unsigned long> assignments(samples.size(), samples.size()); - - - unsigned long iter = 0; - bool centers_changed = true; - while (centers_changed && iter < max_iter) - { - ++iter; - centers_changed = false; - center_element_count.assign(centers.size(), 0); - - // loop over each sample and see which center it is closest to - for (unsigned long i = 0; i < samples.size(); ++i) - { - // find the best center for sample[i] - scalar_type best_dist = std::numeric_limits<scalar_type>::max(); - unsigned long best_center = 0; - for (unsigned long j = 0; j < centers.size(); ++j) - { - scalar_type dist = length(centers[j] - samples[i]); - if (dist < best_dist) - { - best_dist = dist; - best_center = j; - } - } - - if (assignments[i] != best_center) - { - centers_changed = true; - assignments[i] = best_center; - } - - center_element_count[best_center] += 1; - } - - // now update all the centers - centers.assign(centers.size(), zero); - for (unsigned long i = 0; i < samples.size(); ++i) - { - centers[assignments[i]] += samples[i]; - } - for (unsigned long i = 0; i < centers.size(); ++i) - { - if (center_element_count[i] != 0) - centers[i] /= center_element_count[i]; - } - } - - - } - -// ---------------------------------------------------------------------------------------- - - template < - typename array_type, - typename sample_type, - typename alloc - > - void find_clusters_using_angular_kmeans ( - const array_type& samples, - std::vector<sample_type, alloc>& centers, - unsigned long max_iter = 1000 - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(samples.size() > 0 && centers.size() > 0, - "\tvoid find_clusters_using_angular_kmeans()" - << "\n\tYou passed invalid arguments to this function" - << "\n\t samples.size(): " << samples.size() - << "\n\t centers.size(): " << centers.size() - ); - -#ifdef ENABLE_ASSERTS - { - const long nr = samples[0].nr(); - const long nc = samples[0].nc(); - for (unsigned long i = 0; i < samples.size(); ++i) - { - DLIB_ASSERT(is_vector(samples[i]) && samples[i].nr() == nr && samples[i].nc() == nc, - "\tvoid find_clusters_using_angular_kmeans()" - << "\n\t You passed invalid arguments to this function" - << "\n\t is_vector(samples[i]): " << is_vector(samples[i]) - << "\n\t samples[i].nr(): " << samples[i].nr() - << "\n\t nr: " << nr - << "\n\t samples[i].nc(): " << samples[i].nc() - << "\n\t nc: " << nc - << "\n\t i: " << i - ); - } - } -#endif - - typedef typename sample_type::type scalar_type; - - sample_type zero(centers[0]); - set_all_elements(zero, 0); - - unsigned long seed = 0; - - // tells which center a sample belongs to - std::vector<unsigned long> assignments(samples.size(), samples.size()); - std::vector<double> lengths; - for (unsigned long i = 0; i < samples.size(); ++i) - { - lengths.push_back(length(samples[i])); - // If there are zero vectors in samples then just say their length is 1 so we - // can avoid a division by zero check later on. Also, this doesn't matter - // since zero vectors can be assigned to any cluster randomly as there is no - // basis for picking one based on angle. - if (lengths.back() == 0) - lengths.back() = 1; - } - - // We will keep the centers as unit vectors at all times throughout the processing. - for (unsigned long i = 0; i < centers.size(); ++i) - { - double len = length(centers[i]); - // Avoid having length 0 centers. If that is the case then pick another center - // at random. - while(len == 0) - { - centers[i] = matrix_cast<scalar_type>(gaussian_randm(centers[i].nr(), centers[i].nc(), seed++)); - len = length(centers[i]); - } - centers[i] /= len; - } - - - unsigned long iter = 0; - bool centers_changed = true; - while (centers_changed && iter < max_iter) - { - ++iter; - centers_changed = false; - - // loop over each sample and see which center it is closest to - for (unsigned long i = 0; i < samples.size(); ++i) - { - // find the best center for sample[i] - scalar_type best_angle = std::numeric_limits<scalar_type>::max(); - unsigned long best_center = 0; - for (unsigned long j = 0; j < centers.size(); ++j) - { - scalar_type angle = -dot(centers[j],samples[i])/lengths[i]; - - if (angle < best_angle) - { - best_angle = angle; - best_center = j; - } - } - - if (assignments[i] != best_center) - { - centers_changed = true; - assignments[i] = best_center; - } - } - - // now update all the centers - centers.assign(centers.size(), zero); - for (unsigned long i = 0; i < samples.size(); ++i) - { - centers[assignments[i]] += samples[i]; - } - // Now length normalize all the centers. - for (unsigned long i = 0; i < centers.size(); ++i) - { - double len = length(centers[i]); - // Avoid having length 0 centers. If that is the case then pick another center - // at random. - while(len == 0) - { - centers[i] = matrix_cast<scalar_type>(gaussian_randm(centers[i].nr(), centers[i].nc(), seed++)); - len = length(centers[i]); - centers_changed = true; - } - centers[i] /= len; - } - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename array_type, - typename EXP - > - unsigned long nearest_center ( - const array_type& centers, - const matrix_exp<EXP>& sample - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(centers.size() > 0 && sample.size() > 0 && is_vector(sample), - "\t unsigned long nearest_center()" - << "\n\t You have given invalid inputs to this function." - << "\n\t centers.size(): " << centers.size() - << "\n\t sample.size(): " << sample.size() - << "\n\t is_vector(sample): " << is_vector(sample) - ); - - double best_dist = length_squared(centers[0] - sample); - unsigned long best_idx = 0; - for (unsigned long i = 1; i < centers.size(); ++i) - { - const double dist = length_squared(centers[i] - sample); - if (dist < best_dist) - { - best_dist = dist; - best_idx = i; - } - } - return best_idx; - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_KKMEANs_ - - diff --git a/ml/dlib/dlib/svm/kkmeans_abstract.h b/ml/dlib/dlib/svm/kkmeans_abstract.h deleted file mode 100644 index 9f9d7ccce..000000000 --- a/ml/dlib/dlib/svm/kkmeans_abstract.h +++ /dev/null @@ -1,365 +0,0 @@ -// Copyright (C) 2008 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_KKMEANs_ABSTRACT_ -#ifdef DLIB_KKMEANs_ABSTRACT_ - -#include <cmath> -#include "../matrix/matrix_abstract.h" -#include "../algs.h" -#include "../serialize.h" -#include "kernel_abstract.h" -#include "kcentroid_abstract.h" -#include "../noncopyable.h" - -namespace dlib -{ - - template < - typename kernel_type - > - class kkmeans : public noncopyable - { - /*! - REQUIREMENTS ON kernel_type - is a kernel function object as defined in dlib/svm/kernel_abstract.h - - INITIAL VALUE - - number_of_centers() == 1 - - get_min_change() == 0.01 - - WHAT THIS OBJECT REPRESENTS - This is an implementation of a kernelized k-means clustering algorithm. - It performs k-means clustering by using the kcentroid object. - !*/ - - public: - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - - kkmeans ( - const kcentroid<kernel_type>& kc_ - ); - /*! - ensures - - #number_of_centers() == 1 - - #get_min_change() == 0.01 - - #get_kcentroid(0) == a copy of kc_ - !*/ - - ~kkmeans( - ); - /*! - ensures - - all resources associated with *this have been released - !*/ - - void set_kcentroid ( - const kcentroid<kernel_type>& kc_ - ); - /*! - ensures - - for all idx: - - #get_kcentroid(idx) == a copy of kc_ - !*/ - - const kcentroid<kernel_type>& get_kcentroid ( - unsigned long i - ) const; - /*! - requires - - i < number_of_centers() - ensures - - returns a const reference to the ith kcentroid object contained in - this object. Each kcentroid represents one of the centers found - by the k-means clustering algorithm. - !*/ - - const kernel_type& get_kernel ( - ) const; - /*! - ensures - - returns a const reference to the kernel used by this object - !*/ - - void set_number_of_centers ( - unsigned long num - ); - /*! - requires - - num > 0 - ensures - - #number_of_centers() == num - !*/ - - unsigned long number_of_centers ( - ) const; - /*! - ensures - - returns the number of centers used in this instance of the k-means clustering - algorithm. - !*/ - - template < - typename matrix_type, - typename matrix_type2 - > - void train ( - const matrix_type& samples, - const matrix_type2& initial_centers, - long max_iter = 1000 - ); - /*! - requires - - matrix_type and matrix_type2 must either be dlib::matrix objects or convertible to dlib::matrix - via mat() - - matrix_type::type == sample_type (i.e. matrix_type should contain sample_type objects) - - matrix_type2::type == sample_type (i.e. matrix_type2 should contain sample_type objects) - - initial_centers.nc() == 1 (i.e. must be a column vector) - - samples.nc() == 1 (i.e. must be a column vector) - - initial_centers.nr() == number_of_centers() - ensures - - performs k-means clustering of the given set of samples. The initial center points - are taken from the initial_centers argument. - - loops over the data and continues to refine the clustering until either less than - get_min_change() fraction of the data points change clusters or we have done max_iter - iterations over the data. - - After this function finishes you can call the operator() function below - to determine which centroid a given sample is closest to. - !*/ - - unsigned long operator() ( - const sample_type& sample - ) const; - /*! - ensures - - returns a number idx such that: - - idx < number_of_centers() - - get_kcentroid(idx) == the centroid that is closest to the given - sample. - !*/ - - void set_min_change ( - scalar_type min_change - ); - /*! - requires - - 0 <= min_change < 1 - ensures - - #get_min_change() == min_change - !*/ - - const scalar_type get_min_change ( - ) const; - /*! - ensures - - returns the minimum fraction of data points that need to change - centers in an iteration of kmeans for the algorithm to keep going. - !*/ - - void swap ( - kkmeans& item - ); - /*! - ensures - - swaps *this and item - !*/ - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename kernel_type - > - void swap( - kkmeans<kernel_type>& a, - kkmeans<kernel_type>& b - ) { a.swap(b); } - /*! - provides a global swap function - !*/ - - template < - typename kernel_type - > - void serialize ( - const kkmeans<kernel_type>& item, - std::ostream& out - ); - /*! - provides serialization support for kkmeans objects - !*/ - - template < - typename kernel_type - > - void deserialize ( - kkmeans<kernel_type>& item, - std::istream& in - ); - /*! - provides serialization support for kkmeans objects - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename vector_type1, - typename vector_type2, - typename kernel_type - > - void pick_initial_centers( - long num_centers, - vector_type1& centers, - const vector_type2& samples, - const kernel_type& k, - double percentile = 0.01 - ); - /*! - requires - - num_centers > 1 - - 0 <= percentile < 1 - - samples.size() > 1 - - vector_type1 == something with an interface compatible with std::vector - - vector_type2 == something with an interface compatible with std::vector - - k(samples[0],samples[0]) must be a valid expression that returns a double - - both centers and samples must be able to contain kernel_type::sample_type - objects - ensures - - finds num_centers candidate cluster centers in the data in the samples - vector. Assumes that k is the kernel that will be used during clustering - to define the space in which clustering occurs. - - The centers are found by looking for points that are far away from other - candidate centers. However, if the data is noisy you probably want to - ignore the farthest way points since they will be outliers. To do this - set percentile to the fraction of outliers you expect the data to contain. - - #centers.size() == num_centers - - #centers == a vector containing the candidate centers found - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename vector_type1, - typename vector_type2 - > - void pick_initial_centers( - long num_centers, - vector_type1& centers, - const vector_type2& samples, - double percentile = 0.01 - ); - /*! - requires - - num_centers > 1 - - 0 <= percentile < 1 - - samples.size() > 1 - - vector_type1 == something with an interface compatible with std::vector - - vector_type2 == something with an interface compatible with std::vector - - Both centers and samples must be able to contain dlib::matrix based row or - column vectors. - ensures - - performs: pick_initial_centers(num_centers, centers, samples, linear_kernel<sample_type>(), percentile) - (i.e. this function is simply an overload that uses the linear kernel. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename array_type, - typename sample_type, - typename alloc - > - void find_clusters_using_kmeans ( - const array_type& samples, - std::vector<sample_type, alloc>& centers, - unsigned long max_iter = 1000 - ); - /*! - requires - - samples.size() > 0 - - samples == a bunch of row or column vectors and they all must be of the - same length. - - centers.size() > 0 - - array_type == something with an interface compatible with std::vector - and it must contain row or column vectors capable of being stored in - sample_type objects. - - sample_type == a dlib::matrix capable of representing vectors - ensures - - performs regular old linear kmeans clustering on the samples. The clustering - begins with the initial set of centers given as an argument to this function. - When it finishes #centers will contain the resulting centers. - - no more than max_iter iterations will be performed before this function - terminates. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename array_type, - typename sample_type, - typename alloc - > - void find_clusters_using_angular_kmeans ( - const array_type& samples, - std::vector<sample_type, alloc>& centers, - unsigned long max_iter = 1000 - ); - /*! - requires - - samples.size() > 0 - - samples == a bunch of row or column vectors and they all must be of the - same length. - - centers.size() > 0 - - array_type == something with an interface compatible with std::vector - and it must contain row or column vectors capable of being stored in - sample_type objects. - - sample_type == a dlib::matrix capable of representing vectors - ensures - - performs linear kmeans clustering on the samples, except instead of using - Euclidean distance to compare samples to the centers it uses the angle - between a sample and a center (with respect to the origin). So we try to - cluster samples together if they have small angles with respect to each - other. The clustering begins with the initial set of centers given as an - argument to this function. When it finishes #centers will contain the - resulting centers. - - for all valid i: - - length(#centers[i]) == 1 - (i.e. the output centers are scaled to be unit vectors since their - magnitude is irrelevant. Moreover, this makes it so you can use - functions like nearest_center() with #centers to find the cluster - assignments.) - - No more than max_iter iterations will be performed before this function - terminates. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename array_type, - typename EXP - > - unsigned long nearest_center ( - const array_type& centers, - const matrix_exp<EXP>& sample - ); - /*! - requires - - centers.size() > 0 - - sample.size() > 0 - - is_vector(sample) == true - - centers must be an array of vectors such that the following expression is - valid: length_squared(sample - centers[0]). (e.g. centers could be a - std::vector of matrix objects holding column vectors). - ensures - - returns the index that identifies the element of centers that is nearest to - sample. That is, returns a number IDX such that centers[IDX] is the element - of centers that minimizes length(centers[IDX]-sample). - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_KKMEANs_ABSTRACT_ - diff --git a/ml/dlib/dlib/svm/krls.h b/ml/dlib/dlib/svm/krls.h deleted file mode 100644 index 6c72e45e8..000000000 --- a/ml/dlib/dlib/svm/krls.h +++ /dev/null @@ -1,358 +0,0 @@ -// Copyright (C) 2008 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_KRLs_ -#define DLIB_KRLs_ - -#include <vector> - -#include "krls_abstract.h" -#include "../matrix.h" -#include "function.h" -#include "../std_allocator.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template <typename kernel_type> - class krls - { - /*! - This is an implementation of the kernel recursive least squares algorithm described in the paper: - The Kernel Recursive Least Squares Algorithm by Yaakov Engel. - !*/ - - public: - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - - - explicit krls ( - const kernel_type& kernel_, - scalar_type tolerance_ = 0.001, - unsigned long max_dictionary_size_ = 1000000 - ) : - kernel(kernel_), - my_tolerance(tolerance_), - my_max_dictionary_size(max_dictionary_size_) - { - // make sure requires clause is not broken - DLIB_ASSERT(tolerance_ >= 0, - "\tkrls::krls()" - << "\n\t You have to give a positive tolerance" - << "\n\t this: " << this - << "\n\t tolerance: " << tolerance_ - ); - - clear_dictionary(); - } - - scalar_type tolerance() const - { - return my_tolerance; - } - - unsigned long max_dictionary_size() const - { - return my_max_dictionary_size; - } - - const kernel_type& get_kernel ( - ) const - { - return kernel; - } - - void clear_dictionary () - { - dictionary.clear(); - alpha.clear(); - - K_inv.set_size(0,0); - K.set_size(0,0); - P.set_size(0,0); - } - - scalar_type operator() ( - const sample_type& x - ) const - { - scalar_type temp = 0; - for (unsigned long i = 0; i < alpha.size(); ++i) - temp += alpha[i]*kern(dictionary[i], x); - - return temp; - } - - void train ( - const sample_type& x, - scalar_type y - ) - { - const scalar_type kx = kern(x,x); - if (alpha.size() == 0) - { - // just ignore this sample if it is the zero vector (or really close to being zero) - if (std::abs(kx) > std::numeric_limits<scalar_type>::epsilon()) - { - // set initial state since this is the first training example we have seen - - K_inv.set_size(1,1); - K_inv(0,0) = 1/kx; - K.set_size(1,1); - K(0,0) = kx; - - alpha.push_back(y/kx); - dictionary.push_back(x); - P.set_size(1,1); - P(0,0) = 1; - } - } - else - { - // fill in k - k.set_size(alpha.size()); - for (long r = 0; r < k.nr(); ++r) - k(r) = kern(x,dictionary[r]); - - // compute the error we would have if we approximated the new x sample - // with the dictionary. That is, do the ALD test from the KRLS paper. - a = K_inv*k; - scalar_type delta = kx - trans(k)*a; - - // if this new vector isn't approximately linearly dependent on the vectors - // in our dictionary. - if (delta > my_tolerance) - { - if (dictionary.size() >= my_max_dictionary_size) - { - // We need to remove one of the old members of the dictionary before - // we proceed with adding a new one. So remove the oldest one. - remove_dictionary_vector(0); - - // recompute these guys since they were computed with the old - // kernel matrix - k = remove_row(k,0); - a = K_inv*k; - delta = kx - trans(k)*a; - } - - // add x to the dictionary - dictionary.push_back(x); - - // update K_inv by computing the new one in the temp matrix (equation 3.14) - matrix<scalar_type,0,0,mem_manager_type> temp(K_inv.nr()+1, K_inv.nc()+1); - // update the middle part of the matrix - set_subm(temp, get_rect(K_inv)) = K_inv + a*trans(a)/delta; - // update the right column of the matrix - set_subm(temp, 0, K_inv.nr(),K_inv.nr(),1) = -a/delta; - // update the bottom row of the matrix - set_subm(temp, K_inv.nr(), 0, 1, K_inv.nr()) = trans(-a/delta); - // update the bottom right corner of the matrix - temp(K_inv.nr(), K_inv.nc()) = 1/delta; - // put temp into K_inv - temp.swap(K_inv); - - - - - // update K (the kernel matrix) - temp.set_size(K.nr()+1, K.nc()+1); - set_subm(temp, get_rect(K)) = K; - // update the right column of the matrix - set_subm(temp, 0, K.nr(),K.nr(),1) = k; - // update the bottom row of the matrix - set_subm(temp, K.nr(), 0, 1, K.nr()) = trans(k); - temp(K.nr(), K.nc()) = kx; - // put temp into K - temp.swap(K); - - - - - // Now update the P matrix (equation 3.15) - temp.set_size(P.nr()+1, P.nc()+1); - set_subm(temp, get_rect(P)) = P; - // initialize the new sides of P - set_rowm(temp,P.nr()) = 0; - set_colm(temp,P.nr()) = 0; - temp(P.nr(), P.nc()) = 1; - temp.swap(P); - - // now update the alpha vector (equation 3.16) - const scalar_type k_a = (y-trans(k)*mat(alpha))/delta; - for (unsigned long i = 0; i < alpha.size(); ++i) - { - alpha[i] -= a(i)*k_a; - } - alpha.push_back(k_a); - } - else - { - q = P*a/(1+trans(a)*P*a); - - // update P (equation 3.12) - temp_matrix = trans(a)*P; - P -= q*temp_matrix; - - // update the alpha vector (equation 3.13) - const scalar_type k_a = y-trans(k)*mat(alpha); - for (unsigned long i = 0; i < alpha.size(); ++i) - { - alpha[i] += (K_inv*q*k_a)(i); - } - } - } - } - - void swap ( - krls& item - ) - { - exchange(kernel, item.kernel); - dictionary.swap(item.dictionary); - alpha.swap(item.alpha); - K_inv.swap(item.K_inv); - K.swap(item.K); - P.swap(item.P); - exchange(my_tolerance, item.my_tolerance); - q.swap(item.q); - a.swap(item.a); - k.swap(item.k); - temp_matrix.swap(item.temp_matrix); - exchange(my_max_dictionary_size, item.my_max_dictionary_size); - } - - unsigned long dictionary_size ( - ) const { return dictionary.size(); } - - decision_function<kernel_type> get_decision_function ( - ) const - { - return decision_function<kernel_type>( - mat(alpha), - -sum(mat(alpha))*tau, - kernel, - mat(dictionary) - ); - } - - friend void serialize(const krls& item, std::ostream& out) - { - serialize(item.kernel, out); - serialize(item.dictionary, out); - serialize(item.alpha, out); - serialize(item.K_inv, out); - serialize(item.K, out); - serialize(item.P, out); - serialize(item.my_tolerance, out); - serialize(item.my_max_dictionary_size, out); - } - - friend void deserialize(krls& item, std::istream& in) - { - deserialize(item.kernel, in); - deserialize(item.dictionary, in); - deserialize(item.alpha, in); - deserialize(item.K_inv, in); - deserialize(item.K, in); - deserialize(item.P, in); - deserialize(item.my_tolerance, in); - deserialize(item.my_max_dictionary_size, in); - } - - private: - - inline scalar_type kern (const sample_type& m1, const sample_type& m2) const - { - return kernel(m1,m2) + tau; - } - - void remove_dictionary_vector ( - long i - ) - /*! - requires - - 0 <= i < dictionary.size() - ensures - - #dictionary.size() == dictionary.size() - 1 - - #alpha.size() == alpha.size() - 1 - - updates the K_inv matrix so that it is still a proper inverse of the - kernel matrix - - also removes the necessary row and column from the K matrix - - uses the this->a variable so after this function runs that variable - will contain a different value. - !*/ - { - // remove the dictionary vector - dictionary.erase(dictionary.begin()+i); - - // remove the i'th vector from the inverse kernel matrix. This formula is basically - // just the reverse of the way K_inv is updated by equation 3.14 during normal training. - K_inv = removerc(K_inv,i,i) - remove_row(colm(K_inv,i)/K_inv(i,i),i)*remove_col(rowm(K_inv,i),i); - - // now compute the updated alpha values to take account that we just removed one of - // our dictionary vectors - a = (K_inv*remove_row(K,i)*mat(alpha)); - - // now copy over the new alpha values - alpha.resize(alpha.size()-1); - for (unsigned long k = 0; k < alpha.size(); ++k) - { - alpha[k] = a(k); - } - - // update the P matrix as well - P = removerc(P,i,i); - - // update the K matrix as well - K = removerc(K,i,i); - } - - - kernel_type kernel; - - typedef std_allocator<sample_type, mem_manager_type> alloc_sample_type; - typedef std_allocator<scalar_type, mem_manager_type> alloc_scalar_type; - typedef std::vector<sample_type,alloc_sample_type> dictionary_vector_type; - typedef std::vector<scalar_type,alloc_scalar_type> alpha_vector_type; - - dictionary_vector_type dictionary; - alpha_vector_type alpha; - - matrix<scalar_type,0,0,mem_manager_type> K_inv; - matrix<scalar_type,0,0,mem_manager_type> K; - matrix<scalar_type,0,0,mem_manager_type> P; - - scalar_type my_tolerance; - unsigned long my_max_dictionary_size; - - - // temp variables here just so we don't have to reconstruct them over and over. Thus, - // they aren't really part of the state of this object. - matrix<scalar_type,0,1,mem_manager_type> q; - matrix<scalar_type,0,1,mem_manager_type> a; - matrix<scalar_type,0,1,mem_manager_type> k; - matrix<scalar_type,1,0,mem_manager_type> temp_matrix; - - const static scalar_type tau; - - }; - - template <typename kernel_type> - const typename kernel_type::scalar_type krls<kernel_type>::tau = static_cast<typename kernel_type::scalar_type>(0.01); - -// ---------------------------------------------------------------------------------------- - - template <typename kernel_type> - void swap(krls<kernel_type>& a, krls<kernel_type>& b) - { a.swap(b); } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_KRLs_ - diff --git a/ml/dlib/dlib/svm/krls_abstract.h b/ml/dlib/dlib/svm/krls_abstract.h deleted file mode 100644 index 7ea2d9872..000000000 --- a/ml/dlib/dlib/svm/krls_abstract.h +++ /dev/null @@ -1,202 +0,0 @@ -// Copyright (C) 2008 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_KRLs_ABSTRACT_ -#ifdef DLIB_KRLs_ABSTRACT_ - -#include <cmath> -#include "../matrix/matrix_abstract.h" -#include "../algs.h" -#include "../serialize.h" -#include "kernel_abstract.h" - -namespace dlib -{ - - template < - typename kernel_type - > - class krls - { - /*! - REQUIREMENTS ON kernel_type - is a kernel function object as defined in dlib/svm/kernel_abstract.h - - INITIAL VALUE - - dictionary_size() == 0 - - WHAT THIS OBJECT REPRESENTS - This is an implementation of the kernel recursive least squares algorithm - described in the paper: - The Kernel Recursive Least Squares Algorithm by Yaakov Engel. - - The long and short of this algorithm is that it is an online kernel based - regression algorithm. You give it samples (x,y) and it learns the function - f(x) == y. For a detailed description of the algorithm read the above paper. - - Also note that the algorithm internally keeps a set of "dictionary vectors" - that are used to represent the regression function. You can force the - algorithm to use no more than a set number of vectors by setting - the 3rd constructor argument to whatever you want. However, note that - doing this causes the algorithm to bias it's results towards more - recent training examples. - !*/ - - public: - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - - - explicit krls ( - const kernel_type& kernel_, - scalar_type tolerance_ = 0.001, - unsigned long max_dictionary_size_ = 1000000 - ); - /*! - requires - - tolerance >= 0 - ensures - - this object is properly initialized - - #tolerance() == tolerance_ - - #get_decision_function().kernel_function == kernel_ - (i.e. this object will use the given kernel function) - - #get_kernel() == kernel_ - - #max_dictionary_size() == max_dictionary_size_ - !*/ - - scalar_type tolerance( - ) const; - /*! - ensures - - returns the tolerance to use for the approximately linearly dependent - test in the KRLS algorithm. This is a number which governs how - accurately this object will approximate the decision function it is - learning. Smaller values generally result in a more accurate - estimate while also resulting in a bigger set of dictionary vectors in - the learned decision function. Bigger tolerances values result in a - less accurate decision function but also in less dictionary vectors. - - The exact meaning of the tolerance parameter is the following: - Imagine that we have an empirical_kernel_map that contains all - the current dictionary vectors. Then the tolerance is the minimum - projection error (as given by empirical_kernel_map::project()) required - to cause us to include a new vector in the dictionary. So each time - you call train() the krls object basically just computes the projection - error for that new sample and if it is larger than the tolerance - then that new sample becomes part of the dictionary. - !*/ - - const kernel_type& get_kernel ( - ) const; - /*! - ensures - - returns a const reference to the kernel used by this object - !*/ - - unsigned long max_dictionary_size( - ) const; - /*! - ensures - - returns the maximum number of dictionary vectors this object - will use at a time. That is, dictionary_size() will never be - greater than max_dictionary_size(). - !*/ - - void clear_dictionary ( - ); - /*! - ensures - - clears out all learned data - (e.g. #get_decision_function().basis_vectors.size() == 0) - !*/ - - scalar_type operator() ( - const sample_type& x - ) const; - /*! - ensures - - returns the current y estimate for the given x - !*/ - - void train ( - const sample_type& x, - scalar_type y - ); - /*! - ensures - - trains this object that the given x should be mapped to the given y - - if (dictionary_size() == max_dictionary_size() and training - would add another dictionary vector to this object) then - - discards the oldest dictionary vector so that we can still - add a new one and remain below the max number of dictionary - vectors. - !*/ - - void swap ( - krls& item - ); - /*! - ensures - - swaps *this with item - !*/ - - unsigned long dictionary_size ( - ) const; - /*! - ensures - - returns the number of vectors in the dictionary. That is, - returns a number equal to get_decision_function().basis_vectors.size() - !*/ - - decision_function<kernel_type> get_decision_function ( - ) const; - /*! - ensures - - returns a decision function F that represents the function learned - by this object so far. I.e. it is the case that: - - for all x: F(x) == (*this)(x) - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename kernel_type - > - void swap( - krls<kernel_type>& a, - krls<kernel_type>& b - ) - { a.swap(b); } - /*! - provides a global swap function - !*/ - - template < - typename kernel_type - > - void serialize ( - const krls<kernel_type>& item, - std::ostream& out - ); - /*! - provides serialization support for krls objects - !*/ - - template < - typename kernel_type - > - void deserialize ( - krls<kernel_type>& item, - std::istream& in - ); - /*! - provides serialization support for krls objects - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_KRLs_ABSTRACT_ - diff --git a/ml/dlib/dlib/svm/krr_trainer.h b/ml/dlib/dlib/svm/krr_trainer.h deleted file mode 100644 index a43431169..000000000 --- a/ml/dlib/dlib/svm/krr_trainer.h +++ /dev/null @@ -1,368 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_KRR_TRAInER_Hh_ -#define DLIB_KRR_TRAInER_Hh_ - -#include "../algs.h" -#include "function.h" -#include "kernel.h" -#include "empirical_kernel_map.h" -#include "linearly_independent_subset_finder.h" -#include "../statistics.h" -#include "rr_trainer.h" -#include "krr_trainer_abstract.h" -#include <vector> -#include <iostream> - -namespace dlib -{ - template < - typename K - > - class krr_trainer - { - - public: - typedef K kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef decision_function<kernel_type> trained_function_type; - - krr_trainer ( - ) : - verbose(false), - max_basis_size(400), - ekm_stale(true) - { - } - - void be_verbose ( - ) - { - verbose = true; - trainer.be_verbose(); - } - - void be_quiet ( - ) - { - verbose = false; - trainer.be_quiet(); - } - - void use_regression_loss_for_loo_cv ( - ) - { - trainer.use_regression_loss_for_loo_cv(); - } - - void use_classification_loss_for_loo_cv ( - ) - { - trainer.use_classification_loss_for_loo_cv(); - } - - bool will_use_regression_loss_for_loo_cv ( - ) const - { - return trainer.will_use_regression_loss_for_loo_cv(); - } - - const kernel_type get_kernel ( - ) const - { - return kern; - } - - void set_kernel ( - const kernel_type& k - ) - { - kern = k; - } - - template <typename T> - void set_basis ( - const T& basis_samples - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(basis_samples.size() > 0 && is_vector(mat(basis_samples)), - "\tvoid krr_trainer::set_basis(basis_samples)" - << "\n\t You have to give a non-empty set of basis_samples and it must be a vector" - << "\n\t basis_samples.size(): " << basis_samples.size() - << "\n\t is_vector(mat(basis_samples)): " << is_vector(mat(basis_samples)) - << "\n\t this: " << this - ); - - basis = mat(basis_samples); - ekm_stale = true; - } - - bool basis_loaded ( - ) const - { - return (basis.size() != 0); - } - - void clear_basis ( - ) - { - basis.set_size(0); - ekm.clear(); - ekm_stale = true; - } - - unsigned long get_max_basis_size ( - ) const - { - return max_basis_size; - } - - void set_max_basis_size ( - unsigned long max_basis_size_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(max_basis_size_ > 0, - "\t void krr_trainer::set_max_basis_size()" - << "\n\t max_basis_size_ must be greater than 0" - << "\n\t max_basis_size_: " << max_basis_size_ - << "\n\t this: " << this - ); - - max_basis_size = max_basis_size_; - } - - void set_lambda ( - scalar_type lambda_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(lambda_ >= 0, - "\t void krr_trainer::set_lambda()" - << "\n\t lambda must be greater than or equal to 0" - << "\n\t lambda_: " << lambda_ - << "\n\t this: " << this - ); - - trainer.set_lambda(lambda_); - } - - const scalar_type get_lambda ( - ) const - { - return trainer.get_lambda(); - } - - template <typename EXP> - void set_search_lambdas ( - const matrix_exp<EXP>& lambdas - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_vector(lambdas) && lambdas.size() > 0 && min(lambdas) > 0, - "\t void krr_trainer::set_search_lambdas()" - << "\n\t lambdas must be a non-empty vector of values" - << "\n\t is_vector(lambdas): " << is_vector(lambdas) - << "\n\t lambdas.size(): " << lambdas.size() - << "\n\t min(lambdas): " << min(lambdas) - << "\n\t this: " << this - ); - - trainer.set_search_lambdas(lambdas); - } - - const matrix<scalar_type,0,0,mem_manager_type>& get_search_lambdas ( - ) const - { - return trainer.get_search_lambdas(); - } - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const - { - std::vector<scalar_type> temp; - scalar_type temp2; - return do_train(mat(x), mat(y), false, temp, temp2); - } - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y, - std::vector<scalar_type>& loo_values - ) const - { - scalar_type temp; - return do_train(mat(x), mat(y), true, loo_values, temp); - } - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y, - std::vector<scalar_type>& loo_values, - scalar_type& lambda_used - ) const - { - return do_train(mat(x), mat(y), true, loo_values, lambda_used); - } - - - private: - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> do_train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y, - const bool output_loo_values, - std::vector<scalar_type>& loo_values, - scalar_type& the_lambda - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(is_learning_problem(x,y), - "\t decision_function krr_trainer::train(x,y)" - << "\n\t invalid inputs were given to this function" - << "\n\t is_vector(x): " << is_vector(x) - << "\n\t is_vector(y): " << is_vector(y) - << "\n\t x.size(): " << x.size() - << "\n\t y.size(): " << y.size() - ); - -#ifdef ENABLE_ASSERTS - if (get_lambda() == 0 && will_use_regression_loss_for_loo_cv() == false) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_binary_classification_problem(x,y), - "\t decision_function krr_trainer::train(x,y)" - << "\n\t invalid inputs were given to this function" - ); - } -#endif - - // The first thing we do is make sure we have an appropriate ekm ready for use below. - if (basis_loaded()) - { - if (ekm_stale) - { - ekm.load(kern, basis); - ekm_stale = false; - } - } - else - { - linearly_independent_subset_finder<kernel_type> lisf(kern, max_basis_size); - fill_lisf(lisf, x); - ekm.load(lisf); - } - - if (verbose) - { - std::cout << "\nNumber of basis vectors used: " << ekm.out_vector_size() << std::endl; - } - - typedef matrix<scalar_type,0,1,mem_manager_type> column_matrix_type; - - running_stats<scalar_type> rs; - - // Now we project all the x samples into kernel space using our EKM - matrix<column_matrix_type,0,1,mem_manager_type > proj_x; - proj_x.set_size(x.size()); - for (long i = 0; i < proj_x.size(); ++i) - { - scalar_type err; - // Note that we also append a 1 to the end of the vectors because this is - // a convenient way of dealing with the bias term later on. - if (verbose == false) - { - proj_x(i) = ekm.project(x(i)); - } - else - { - proj_x(i) = ekm.project(x(i),err); - rs.add(err); - } - } - - if (verbose) - { - std::cout << "Mean EKM projection error: " << rs.mean() << std::endl; - std::cout << "Standard deviation of EKM projection error: " << rs.stddev() << std::endl; - } - - - decision_function<linear_kernel<matrix<scalar_type,0,0,mem_manager_type> > > lin_df; - - if (output_loo_values) - lin_df = trainer.train(proj_x,y, loo_values, the_lambda); - else - lin_df = trainer.train(proj_x,y); - - // convert the linear decision function into a kernelized one. - decision_function<kernel_type> df; - df = ekm.convert_to_decision_function(lin_df.basis_vectors(0)); - df.b = lin_df.b; - - // If we used an automatically derived basis then there isn't any point in - // keeping the ekm around. So free its memory. - if (basis_loaded() == false) - { - ekm.clear(); - } - - return df; - } - - - /*! - CONVENTION - - if (ekm_stale) then - - kern or basis have changed since the last time - they were loaded into the ekm - - - get_lambda() == trainer.get_lambda() - - get_kernel() == kern - - get_max_basis_size() == max_basis_size - - will_use_regression_loss_for_loo_cv() == trainer.will_use_regression_loss_for_loo_cv() - - get_search_lambdas() == trainer.get_search_lambdas() - - - basis_loaded() == (basis.size() != 0) - !*/ - - rr_trainer<linear_kernel<matrix<scalar_type,0,0,mem_manager_type> > > trainer; - - bool verbose; - - - kernel_type kern; - unsigned long max_basis_size; - - matrix<sample_type,0,1,mem_manager_type> basis; - mutable empirical_kernel_map<kernel_type> ekm; - mutable bool ekm_stale; - - }; - -} - -#endif // DLIB_KRR_TRAInER_Hh_ - - diff --git a/ml/dlib/dlib/svm/krr_trainer_abstract.h b/ml/dlib/dlib/svm/krr_trainer_abstract.h deleted file mode 100644 index 399802f6b..000000000 --- a/ml/dlib/dlib/svm/krr_trainer_abstract.h +++ /dev/null @@ -1,322 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_KRR_TRAInER_ABSTRACT_Hh_ -#ifdef DLIB_KRR_TRAInER_ABSTRACT_Hh_ - -#include "../algs.h" -#include "function_abstract.h" -#include "kernel_abstract.h" -#include "empirical_kernel_map_abstract.h" - -namespace dlib -{ - template < - typename K - > - class krr_trainer - { - /*! - REQUIREMENTS ON K - is a kernel function object as defined in dlib/svm/kernel_abstract.h - - INITIAL VALUE - - get_lambda() == 0 - - basis_loaded() == false - - get_max_basis_size() == 400 - - will_use_regression_loss_for_loo_cv() == true - - get_search_lambdas() == logspace(-9, 2, 50) - - this object will not be verbose unless be_verbose() is called - - WHAT THIS OBJECT REPRESENTS - This object represents a tool for performing kernel ridge regression - (This basic algorithm is also known my many other names, e.g. regularized - least squares or least squares SVM). - - The exact definition of what this algorithm does is this: - Find w and b that minimizes the following (x_i are input samples and y_i are target values): - lambda*dot(w,w) + sum_over_i( (f(x_i) - y_i)^2 ) - where f(x) == dot(x,w) - b - - Except the dot products are replaced by kernel functions. So this - algorithm is just regular old least squares regression but with the - addition of a regularization term which encourages small w and the - application of the kernel trick. - - - It is implemented using the empirical_kernel_map and thus allows you - to run the algorithm on large datasets and obtain sparse outputs. It is also - capable of estimating the lambda parameter using leave-one-out cross-validation. - - - The leave-one-out cross-validation implementation is based on the techniques - discussed in this paper: - Notes on Regularized Least Squares by Ryan M. Rifkin and Ross A. Lippert. - !*/ - - public: - typedef K kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef decision_function<kernel_type> trained_function_type; - - krr_trainer ( - ); - /*! - ensures - - This object is properly initialized and ready to be used. - !*/ - - void be_verbose ( - ); - /*! - ensures - - This object will print status messages to standard out. - !*/ - - void be_quiet ( - ); - /*! - ensures - - this object will not print anything to standard out - !*/ - - const kernel_type get_kernel ( - ) const; - /*! - ensures - - returns a copy of the kernel function in use by this object - !*/ - - void set_kernel ( - const kernel_type& k - ); - /*! - ensures - - #get_kernel() == k - !*/ - - template <typename T> - void set_basis ( - const T& basis_samples - ); - /*! - requires - - T must be a dlib::matrix type or something convertible to a matrix via mat() - (e.g. a std::vector) - - is_vector(basis_samples) == true - - basis_samples.size() > 0 - - get_kernel() must be capable of operating on the elements of basis_samples. That is, - expressions such as get_kernel()(basis_samples(0), basis_samples(0)) should make sense. - ensures - - #basis_loaded() == true - - training will be carried out in the span of the given basis_samples - !*/ - - bool basis_loaded ( - ) const; - /*! - ensures - - returns true if this object has been loaded with user supplied basis vectors and false otherwise. - !*/ - - void clear_basis ( - ); - /*! - ensures - - #basis_loaded() == false - !*/ - - unsigned long get_max_basis_size ( - ) const; - /*! - ensures - - returns the maximum number of basis vectors this object is allowed - to use. This parameter only matters when the user has not supplied - a basis via set_basis(). - !*/ - - void set_max_basis_size ( - unsigned long max_basis_size - ); - /*! - requires - - max_basis_size > 0 - ensures - - #get_max_basis_size() == max_basis_size - !*/ - - void set_lambda ( - scalar_type lambda - ); - /*! - requires - - lambda >= 0 - ensures - - #get_lambda() == lambda - !*/ - - const scalar_type get_lambda ( - ) const; - /*! - ensures - - returns the regularization parameter. It is the parameter that - determines the trade off between trying to fit the training data - exactly or allowing more errors but hopefully improving the - generalization ability of the resulting function. Smaller values - encourage exact fitting while larger values of lambda may encourage - better generalization. - - Note that a lambda of 0 has a special meaning. It indicates to this - object that it should automatically determine an appropriate lambda - value. This is done using leave-one-out cross-validation. - !*/ - - void use_regression_loss_for_loo_cv ( - ); - /*! - ensures - - #will_use_regression_loss_for_loo_cv() == true - !*/ - - void use_classification_loss_for_loo_cv ( - ); - /*! - ensures - - #will_use_regression_loss_for_loo_cv() == false - !*/ - - bool will_use_regression_loss_for_loo_cv ( - ) const; - /*! - ensures - - returns true if the automatic lambda estimation will attempt to estimate a lambda - appropriate for a regression task. Otherwise it will try and find one which - minimizes the number of classification errors. - !*/ - - template <typename EXP> - void set_search_lambdas ( - const matrix_exp<EXP>& lambdas - ); - /*! - requires - - is_vector(lambdas) == true - - lambdas.size() > 0 - - min(lambdas) > 0 - - lambdas must contain floating point numbers - ensures - - #get_search_lambdas() == lambdas - !*/ - - const matrix<scalar_type,0,0,mem_manager_type>& get_search_lambdas ( - ) const; - /*! - ensures - - returns a matrix M such that: - - is_vector(M) == true - - M == a list of all the lambda values which will be tried when performing - LOO cross-validation for determining the best lambda. - !*/ - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const; - /*! - requires - - x == a matrix or something convertible to a matrix via mat(). - Also, x should contain sample_type objects. - - y == a matrix or something convertible to a matrix via mat(). - Also, y should contain scalar_type objects. - - is_learning_problem(x,y) == true - - if (get_lambda() == 0 && will_use_regression_loss_for_loo_cv() == false) then - - is_binary_classification_problem(x,y) == true - (i.e. if you want this algorithm to estimate a lambda appropriate for - classification functions then you had better give a valid classification - problem) - ensures - - performs kernel ridge regression given the training samples in x and target values in y. - - returns a decision_function F with the following properties: - - F(new_x) == predicted y value - - - if (basis_loaded()) then - - training will be carried out in the span of the user supplied basis vectors - - else - - this object will attempt to automatically select an appropriate basis - - - if (get_lambda() == 0) then - - This object will perform internal leave-one-out cross-validation to determine an - appropriate lambda automatically. It will compute the LOO error for each lambda - in get_search_lambdas() and select the best one. - - if (will_use_regression_loss_for_loo_cv()) then - - the lambda selected will be the one that minimizes the mean squared error. - - else - - the lambda selected will be the one that minimizes the number classification - mistakes. We say a point is classified correctly if the output of the - decision_function has the same sign as its label. - - #get_lambda() == 0 - (i.e. we don't change the get_lambda() value. If you want to know what the - automatically selected lambda value was then call the version of train() - defined below) - - else - - The user supplied value of get_lambda() will be used to perform the kernel - ridge regression. - !*/ - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y, - std::vector<scalar_type>& loo_values - ) const; - /*! - requires - - all the requirements for train(x,y) must be satisfied - ensures - - returns train(x,y) - (i.e. executes train(x,y) and returns its result) - - #loo_values.size() == y.size() - - for all valid i: - - #loo_values[i] == leave-one-out prediction for the value of y(i) based - on all the training samples other than (x(i),y(i)). - !*/ - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y, - std::vector<scalar_type>& loo_values, - scalar_type& lambda_used - ) const; - /*! - requires - - all the requirements for train(x,y) must be satisfied - ensures - - returns train(x,y) - (i.e. executes train(x,y) and returns its result) - - #loo_values.size() == y.size() - - for all valid i: - - #loo_values[i] == leave-one-out prediction for the value of y(i) based - on all the training samples other than (x(i),y(i)). - - #lambda_used == the value of lambda used to generate the - decision_function. Note that this lambda value is always - equal to get_lambda() if get_lambda() isn't 0. - !*/ - - }; - -} - -#endif // DLIB_KRR_TRAInER_ABSTRACT_Hh_ - diff --git a/ml/dlib/dlib/svm/linearly_independent_subset_finder.h b/ml/dlib/dlib/svm/linearly_independent_subset_finder.h deleted file mode 100644 index 3bac0df2c..000000000 --- a/ml/dlib/dlib/svm/linearly_independent_subset_finder.h +++ /dev/null @@ -1,540 +0,0 @@ -// Copyright (C) 2008 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_LISfh_ -#define DLIB_LISfh_ - -#include <vector> - -#include "linearly_independent_subset_finder_abstract.h" -#include "../matrix.h" -#include "function.h" -#include "../std_allocator.h" -#include "../algs.h" -#include "../serialize.h" -#include "../is_kind.h" -#include "../string.h" -#include "../rand.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template <typename kernel_type> - class linearly_independent_subset_finder - { - /*! - INITIAL VALUE - - min_strength == 0 - - min_vect_idx == 0 - - K_inv.size() == 0 - - K.size() == 0 - - dictionary.size() == 0 - - CONVENTION - - max_dictionary_size() == my_max_dictionary_size - - get_kernel() == kernel - - minimum_tolerance() == min_tolerance - - size() == dictionary.size() - - get_dictionary() == mat(dictionary) - - K.nr() == dictionary.size() - - K.nc() == dictionary.size() - - for all valid r,c: - - K(r,c) == kernel(dictionary[r], dictionary[c]) - - K_inv == inv(K) - - - if (dictionary.size() == my_max_dictionary_size) then - - for all valid 0 < i < dictionary.size(): - - Let STRENGTHS[i] == the delta you would get for dictionary[i] (i.e. Approximately - Linearly Dependent value) if you removed dictionary[i] from this object and then - tried to add it back in. - - min_strength == the minimum value from STRENGTHS - - min_vect_idx == the index of the element in STRENGTHS with the smallest value - !*/ - - public: - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::sample_type type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - - linearly_independent_subset_finder ( - ) : - my_max_dictionary_size(100), - min_tolerance(0.001) - { - clear_dictionary(); - } - - linearly_independent_subset_finder ( - const kernel_type& kernel_, - unsigned long max_dictionary_size_, - scalar_type min_tolerance_ = 0.001 - ) : - kernel(kernel_), - my_max_dictionary_size(max_dictionary_size_), - min_tolerance(min_tolerance_) - { - // make sure requires clause is not broken - DLIB_ASSERT(min_tolerance_ > 0 && max_dictionary_size_ > 1, - "\tlinearly_independent_subset_finder()" - << "\n\tinvalid argument to constructor" - << "\n\tmin_tolerance_: " << min_tolerance_ - << "\n\tmax_dictionary_size_: " << max_dictionary_size_ - << "\n\tthis: " << this - ); - clear_dictionary(); - } - - unsigned long max_dictionary_size() const - { - return my_max_dictionary_size; - } - - const kernel_type& get_kernel ( - ) const - { - return kernel; - } - - scalar_type minimum_tolerance( - ) const - { - return min_tolerance; - } - - void set_minimum_tolerance ( - scalar_type min_tol - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(min_tol > 0, - "\tlinearly_independent_subset_finder::set_minimum_tolerance()" - << "\n\tinvalid argument to this function" - << "\n\tmin_tol: " << min_tol - << "\n\tthis: " << this - ); - min_tolerance = min_tol; - } - - void clear_dictionary () - { - dictionary.clear(); - min_strength = 0; - min_vect_idx = 0; - - K_inv.set_size(0,0); - K.set_size(0,0); - } - - scalar_type projection_error ( - const sample_type& x - ) const - { - const scalar_type kx = kernel(x,x); - if (dictionary.size() == 0) - { - return kx; - } - else - { - // fill in k - k.set_size(dictionary.size()); - for (long r = 0; r < k.nr(); ++r) - k(r) = kernel(x,dictionary[r]); - - // compute the error we would have if we approximated the new x sample - // with the dictionary. That is, do the ALD test from the KRLS paper. - a = K_inv*k; - scalar_type delta = kx - trans(k)*a; - - return delta; - } - } - - bool add ( - const sample_type& x - ) - { - const scalar_type kx = kernel(x,x); - if (dictionary.size() == 0) - { - // just ignore this sample if it is the zero vector (or really close to being zero) - if (std::abs(kx) > std::numeric_limits<scalar_type>::epsilon()) - { - // set initial state since this is the first sample we have seen - K_inv.set_size(1,1); - K_inv(0,0) = 1/kx; - - K.set_size(1,1); - K(0,0) = kx; - - dictionary.push_back(x); - return true; - } - return false; - } - else - { - // fill in k - k.set_size(dictionary.size()); - for (long r = 0; r < k.nr(); ++r) - k(r) = kernel(x,dictionary[r]); - - // compute the error we would have if we approximated the new x sample - // with the dictionary. That is, do the ALD test from the KRLS paper. - a = K_inv*k; - scalar_type delta = kx - trans(k)*a; - - // if this new vector is approximately linearly independent of the vectors - // in our dictionary. - if (delta > min_strength && delta > min_tolerance) - { - if (dictionary.size() == my_max_dictionary_size) - { - // if we have never computed the min_strength then we should compute it - if (min_strength == 0) - recompute_min_strength(); - - const long i = min_vect_idx; - - // replace the min strength vector with x. Put the new vector onto the end of - // dictionary and remove the vector at position i. - dictionary.erase(dictionary.begin()+i); - dictionary.push_back(x); - - // compute reduced K_inv. - // Remove the i'th vector from the inverse kernel matrix. This formula is basically - // just the reverse of the way K_inv is updated by equation 3.14 below. - temp = removerc(K_inv,i,i) - remove_row(colm(K_inv,i)/K_inv(i,i),i)*remove_col(rowm(K_inv,i),i); - - // recompute these guys since they were computed with the old - // kernel matrix - k2 = remove_row(k,i); - a2 = temp*k2; - delta = kx - trans(k2)*a2; - - // now update temp with the new dictionary vector - // update the middle part of the matrix - set_subm(K_inv, get_rect(temp)) = temp + a2*trans(a2)/delta; - // update the right column of the matrix - set_subm(K_inv, 0, temp.nr(),temp.nr(),1) = -a2/delta; - // update the bottom row of the matrix - set_subm(K_inv, temp.nr(), 0, 1, temp.nr()) = trans(-a2/delta); - // update the bottom right corner of the matrix - K_inv(temp.nr(), temp.nc()) = 1/delta; - - // now update the kernel matrix K - set_subm(K,get_rect(temp)) = removerc(K, i,i); - set_subm(K, 0, K.nr()-1,K.nr()-1,1) = k2; - // update the bottom row of the matrix - set_subm(K, K.nr()-1, 0, 1, K.nr()-1) = trans(k2); - K(K.nr()-1, K.nc()-1) = kx; - - // now we have to recompute the min_strength in this case - recompute_min_strength(); - } - else - { - // update K_inv by computing the new one in the temp matrix (equation 3.14 from Engel) - temp.set_size(K_inv.nr()+1, K_inv.nc()+1); - // update the middle part of the matrix - set_subm(temp, get_rect(K_inv)) = K_inv + a*trans(a)/delta; - // update the right column of the matrix - set_subm(temp, 0, K_inv.nr(),K_inv.nr(),1) = -a/delta; - // update the bottom row of the matrix - set_subm(temp, K_inv.nr(), 0, 1, K_inv.nr()) = trans(-a/delta); - // update the bottom right corner of the matrix - temp(K_inv.nr(), K_inv.nc()) = 1/delta; - // put temp into K_inv - temp.swap(K_inv); - - - // update K (the kernel matrix) - temp.set_size(K.nr()+1, K.nc()+1); - set_subm(temp, get_rect(K)) = K; - // update the right column of the matrix - set_subm(temp, 0, K.nr(),K.nr(),1) = k; - // update the bottom row of the matrix - set_subm(temp, K.nr(), 0, 1, K.nr()) = trans(k); - temp(K.nr(), K.nc()) = kx; - // put temp into K - temp.swap(K); - - - // add x to the dictionary - dictionary.push_back(x); - - } - return true; - } - else - { - return false; - } - } - } - - void swap ( - linearly_independent_subset_finder& item - ) - { - exchange(kernel, item.kernel); - dictionary.swap(item.dictionary); - exchange(min_strength, item.min_strength); - exchange(min_vect_idx, item.min_vect_idx); - K_inv.swap(item.K_inv); - K.swap(item.K); - exchange(my_max_dictionary_size, item.my_max_dictionary_size); - exchange(min_tolerance, item.min_tolerance); - - // non-state temp members - a.swap(item.a); - k.swap(item.k); - a2.swap(item.a2); - k2.swap(item.k2); - temp.swap(item.temp); - } - - size_t size ( - ) const { return dictionary.size(); } - - const matrix<sample_type,0,1,mem_manager_type> get_dictionary ( - ) const - { - return mat(dictionary); - } - - friend void serialize(const linearly_independent_subset_finder& item, std::ostream& out) - { - serialize(item.kernel, out); - serialize(item.dictionary, out); - serialize(item.min_strength, out); - serialize(item.min_vect_idx, out); - serialize(item.K_inv, out); - serialize(item.K, out); - serialize(item.my_max_dictionary_size, out); - serialize(item.min_tolerance, out); - } - - friend void deserialize(linearly_independent_subset_finder& item, std::istream& in) - { - deserialize(item.kernel, in); - deserialize(item.dictionary, in); - deserialize(item.min_strength, in); - deserialize(item.min_vect_idx, in); - deserialize(item.K_inv, in); - deserialize(item.K, in); - deserialize(item.my_max_dictionary_size, in); - deserialize(item.min_tolerance, in); - } - - const sample_type& operator[] ( - unsigned long index - ) const - { - return dictionary[index]; - } - - const matrix<scalar_type,0,0,mem_manager_type>& get_kernel_matrix ( - ) const - { - return K; - } - - const matrix<scalar_type,0,0,mem_manager_type>& get_inv_kernel_marix ( - ) const - { - return K_inv; - } - - private: - - typedef std_allocator<sample_type, mem_manager_type> alloc_sample_type; - typedef std_allocator<scalar_type, mem_manager_type> alloc_scalar_type; - typedef std::vector<sample_type,alloc_sample_type> dictionary_vector_type; - typedef std::vector<scalar_type,alloc_scalar_type> scalar_vector_type; - - void recompute_min_strength ( - ) - /*! - ensures - - recomputes the min_strength and min_vect_idx values - so that they are correct with respect to the CONVENTION - !*/ - { - min_strength = std::numeric_limits<scalar_type>::max(); - - // here we loop over each dictionary vector and compute what its delta would be if - // we were to remove it from the dictionary and then try to add it back in. - for (unsigned long i = 0; i < dictionary.size(); ++i) - { - // compute a2 = K_inv*k but where dictionary vector i has been removed - a2 = (removerc(K_inv,i,i) - remove_row(colm(K_inv,i)/K_inv(i,i),i)*remove_col(rowm(K_inv,i),i)) * - (remove_row(colm(K,i),i)); - scalar_type delta = K(i,i) - trans(remove_row(colm(K,i),i))*a2; - - if (delta < min_strength) - { - min_strength = delta; - min_vect_idx = i; - } - } - } - - - kernel_type kernel; - dictionary_vector_type dictionary; - scalar_type min_strength; - unsigned long min_vect_idx; - - matrix<scalar_type,0,0,mem_manager_type> K_inv; - matrix<scalar_type,0,0,mem_manager_type> K; - - unsigned long my_max_dictionary_size; - scalar_type min_tolerance; - - // temp variables here just so we don't have to reconstruct them over and over. Thus, - // they aren't really part of the state of this object. - mutable matrix<scalar_type,0,1,mem_manager_type> a, a2; - mutable matrix<scalar_type,0,1,mem_manager_type> k, k2; - mutable matrix<scalar_type,0,0,mem_manager_type> temp; - - }; - -// ---------------------------------------------------------------------------------------- - - template <typename kernel_type> - void swap(linearly_independent_subset_finder<kernel_type>& a, linearly_independent_subset_finder<kernel_type>& b) - { a.swap(b); } - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - const matrix_op<op_array_to_mat<linearly_independent_subset_finder<T> > > mat ( - const linearly_independent_subset_finder<T>& m - ) - { - typedef op_array_to_mat<linearly_independent_subset_finder<T> > op; - return matrix_op<op>(op(m)); - } - -// ---------------------------------------------------------------------------------------- - namespace impl - { - template < - typename kernel_type, - typename vector_type, - typename rand_type - > - void fill_lisf ( - linearly_independent_subset_finder<kernel_type>& lisf, - const vector_type& samples, - rand_type& rnd, - int sampling_size - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_vector(samples) && sampling_size > 0, - "\t void fill_lisf()" - << "\n\t invalid arguments to this function" - << "\n\t is_vector(samples): " << is_vector(samples) - << "\n\t sampling_size: " << sampling_size - ); - - // no need to do anything if there aren't any samples - if (samples.size() == 0) - return; - - typedef typename kernel_type::scalar_type scalar_type; - - // Start out by guessing what a reasonable projection error tolerance is. We will use - // the biggest projection error we see in a small sample. - scalar_type tol = 0; - for (int i = 0; i < sampling_size; ++i) - { - const unsigned long idx = rnd.get_random_32bit_number()%samples.size(); - const scalar_type temp = lisf.projection_error(samples(idx)); - if (temp > tol) - tol = temp; - } - - const scalar_type min_tol = lisf.minimum_tolerance(); - - // run many rounds of random sampling. In each round we drop the tolerance lower. - while (tol >= min_tol && lisf.size() < lisf.max_dictionary_size()) - { - tol *= 0.5; - lisf.set_minimum_tolerance(std::max(tol, min_tol)); - int add_failures = 0; - - // Keep picking random samples and adding them into the lisf. Stop when we either - // fill it up or can't find any more samples with projection error larger than the - // current tolerance. - while (lisf.size() < lisf.max_dictionary_size() && add_failures < sampling_size) - { - if (lisf.add(samples(rnd.get_random_32bit_number()%samples.size())) == false) - { - ++add_failures; - } - } - } - - // set this back to its original value - lisf.set_minimum_tolerance(min_tol); - } - } - - template < - typename kernel_type, - typename vector_type - > - void fill_lisf ( - linearly_independent_subset_finder<kernel_type>& lisf, - const vector_type& samples - ) - { - dlib::rand rnd; - impl::fill_lisf(lisf, mat(samples),rnd, 2000); - } - - template < - typename kernel_type, - typename vector_type, - typename rand_type - > - typename enable_if<is_rand<rand_type> >::type fill_lisf ( - linearly_independent_subset_finder<kernel_type>& lisf, - const vector_type& samples, - rand_type& rnd, - const int sampling_size = 2000 - ) - { - impl::fill_lisf(lisf, mat(samples),rnd, sampling_size); - } - - template < - typename kernel_type, - typename vector_type, - typename rand_type - > - typename disable_if<is_rand<rand_type> >::type fill_lisf ( - linearly_independent_subset_finder<kernel_type>& lisf, - const vector_type& samples, - rand_type random_seed, - const int sampling_size = 2000 - ) - { - dlib::rand rnd; - rnd.set_seed(cast_to_string(random_seed)); - impl::fill_lisf(lisf, mat(samples), rnd, sampling_size); - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_LISfh_ - diff --git a/ml/dlib/dlib/svm/linearly_independent_subset_finder_abstract.h b/ml/dlib/dlib/svm/linearly_independent_subset_finder_abstract.h deleted file mode 100644 index 3224f9a0a..000000000 --- a/ml/dlib/dlib/svm/linearly_independent_subset_finder_abstract.h +++ /dev/null @@ -1,327 +0,0 @@ -// Copyright (C) 2008 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_LISf_ABSTRACT_ -#ifdef DLIB_LISf_ABSTRACT_ - -#include "../algs.h" -#include "../serialize.h" -#include "kernel_abstract.h" - -namespace dlib -{ - - template < - typename kernel_type - > - class linearly_independent_subset_finder - { - /*! - REQUIREMENTS ON kernel_type - is a kernel function object as defined in dlib/svm/kernel_abstract.h - - INITIAL VALUE - - size() == 0 - - WHAT THIS OBJECT REPRESENTS - This is an implementation of an online algorithm for recursively finding a - set (aka dictionary) of linearly independent vectors in a kernel induced - feature space. To use it you decide how large you would like the dictionary - to be and then you feed it sample points. - - The implementation uses the Approximately Linearly Dependent metric described - in the paper The Kernel Recursive Least Squares Algorithm by Yaakov Engel to - decide which points are more linearly independent than others. The metric is - simply the squared distance between a test point and the subspace spanned by - the set of dictionary vectors. - - Each time you present this object with a new sample point (via this->add()) - it calculates the projection distance and if it is sufficiently large then this - new point is included into the dictionary. Note that this object can be configured - to have a maximum size. Once the max dictionary size is reached each new point - kicks out a previous point. This is done by removing the dictionary vector that - has the smallest projection distance onto the others. That is, the "least linearly - independent" vector is removed to make room for the new one. - !*/ - - public: - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::sample_type type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - - linearly_independent_subset_finder ( - ); - /*! - ensures - - #minimum_tolerance() == 0.001 - - this object is properly initialized - - #get_kernel() == kernel_type() (i.e. whatever the default is for the supplied kernel) - - #max_dictionary_size() == 100 - !*/ - - linearly_independent_subset_finder ( - const kernel_type& kernel_, - unsigned long max_dictionary_size_, - scalar_type min_tolerance = 0.001 - ); - /*! - requires - - min_tolerance > 0 - - max_dictionary_size > 1 - ensures - - #minimum_tolerance() == min_tolerance - - this object is properly initialized - - #get_kernel() == kernel_ - - #max_dictionary_size() == max_dictionary_size_ - !*/ - - const kernel_type& get_kernel ( - ) const; - /*! - ensures - - returns a const reference to the kernel used by this object - !*/ - - unsigned long max_dictionary_size( - ) const; - /*! - ensures - - returns the maximum number of dictionary vectors this object - will accumulate. That is, size() will never be - greater than max_dictionary_size(). - !*/ - - scalar_type minimum_tolerance( - ) const; - /*! - ensures - - returns the minimum projection error necessary to include a sample point - into the dictionary. - !*/ - - void set_minimum_tolerance ( - scalar_type min_tolerance - ); - /*! - requires - - min_tolerance > 0 - ensures - - #minimum_tolerance() == min_tolerance - !*/ - - void clear_dictionary ( - ); - /*! - ensures - - clears out all the data (e.g. #size() == 0) - !*/ - - bool add ( - const sample_type& x - ); - /*! - ensures - - if (size() < max_dictionary_size() then - - if (projection_error(x) > minimum_tolerance()) then - - adds x into the dictionary - - (*this)[#size()-1] == x - - #size() == size() + 1 - - returns true - - else - - the dictionary is not changed - - returns false - - else - - #size() == size() - (i.e. the number of vectors in this object doesn't change) - - since the dictionary is full adding a new element means we have to - remove one of the current ones. So let proj_error[i] be equal to the - projection error obtained when projecting dictionary vector (*this)[i] - onto the other elements of the dictionary. Then let min_proj_error - be equal to the minimum value in proj_error. The dictionary element - with the minimum projection error is the "least linearly independent" - vector in the dictionary and is the one which will be removed to make - room for a new element. - - if (projection_error(x) > minimum_tolerance() && projection_error(x) > min_proj_error) - - the least linearly independent vector in this object is removed - - adds x into the dictionary - - (*this)[#size()-1] == x - - returns true - - else - - the dictionary is not changed - - returns false - !*/ - - scalar_type projection_error ( - const sample_type& x - ) const; - /*! - ensures - - returns the squared distance between x and the subspace spanned by - the set of dictionary vectors. (e.g. this is the same number that - gets returned by the empirical_kernel_map::project() function's - projection_error argument when the ekm is loaded with the dictionary - vectors.) - - Note that if the dictionary is empty then the return value is - equal to get_kernel()(x,x). - !*/ - - void swap ( - linearly_independent_subset_finder& item - ); - /*! - ensures - - swaps *this with item - !*/ - - size_t size ( - ) const; - /*! - ensures - - returns the number of vectors in the dictionary. - !*/ - - const sample_type& operator[] ( - unsigned long index - ) const; - /*! - requires - - index < size() - ensures - - returns the index'th element in the set of linearly independent - vectors contained in this object. - !*/ - - const matrix<sample_type,0,1,mem_manager_type> get_dictionary ( - ) const; - /*! - ensures - - returns a column vector that contains all the dictionary - vectors in this object. - !*/ - - const matrix<scalar_type,0,0,mem_manager_type>& get_kernel_matrix ( - ) const; - /*! - ensures - - returns a matrix K such that: - - K.nr() == K.nc() == size() - - K == kernel_matrix(get_kernel(), get_dictionary()) - i.e. K == the kernel matrix for the dictionary vectors - !*/ - - const matrix<scalar_type,0,0,mem_manager_type>& get_inv_kernel_marix ( - ) const; - /*! - ensures - - if (size() != 0) - - returns inv(get_kernel_matrix()) - - else - - returns an empty matrix - !*/ - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename kernel_type - > - void swap( - linearly_independent_subset_finder<kernel_type>& a, - linearly_independent_subset_finder<kernel_type>& b - ) { a.swap(b); } - /*! - provides a global swap function - !*/ - - template < - typename kernel_type - > - void serialize ( - const linearly_independent_subset_finder<kernel_type>& item, - std::ostream& out - ); - /*! - provides serialization support for linearly_independent_subset_finder objects - !*/ - - template < - typename kernel_type - > - void deserialize ( - linearly_independent_subset_finder<kernel_type>& item, - std::istream& in - ); - /*! - provides serialization support for linearly_independent_subset_finder objects - !*/ - - template < - typename T - > - const matrix_exp mat ( - const linearly_independent_subset_finder<T>& m - ); - /*! - ensures - - converts m into a matrix - - returns a matrix R such that: - - is_col_vector(R) == true - - R.size() == m.size() - - for all valid r: - R(r) == m[r] - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename kernel_type, - typename vector_type, - typename rand_type - > - void fill_lisf ( - linearly_independent_subset_finder<kernel_type>& lisf, - const vector_type& samples, - rand_type& rnd, - int sampling_size = 2000 - ); - /*! - requires - - vector_type == a dlib::matrix or something convertible to one via - mat() - - is_vector(mat(samples)) == true - - rand_type == an implementation of rand/rand_kernel_abstract.h or a type - convertible to a string via cast_to_string() - - sampling_size > 0 - ensures - - The purpose of this function is to fill lisf with points from samples. It does - this by randomly sampling elements of samples until no more can be added. The - precise stopping condition is when sampling_size additions to lisf have failed - or the max dictionary size has been reached. - - This function employs a random number generator. If rand_type is a random - number generator then it uses the instance given. Otherwise it uses cast_to_string(rnd) - to seed a new random number generator. - !*/ - - template < - typename kernel_type, - typename vector_type - > - void fill_lisf ( - linearly_independent_subset_finder<kernel_type>& lisf, - const vector_type& samples - ); - /*! - requires - - vector_type == a dlib::matrix or something convertible to one via - mat() - - is_vector(mat(samples)) == true - ensures - - performs fill_lisf(lisf, samples, default_rand_generator, 2000) - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_LISf_ABSTRACT_ - diff --git a/ml/dlib/dlib/svm/multiclass_tools.h b/ml/dlib/dlib/svm/multiclass_tools.h deleted file mode 100644 index d97e8aa04..000000000 --- a/ml/dlib/dlib/svm/multiclass_tools.h +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_MULTICLASS_TOoLS_Hh_ -#define DLIB_MULTICLASS_TOoLS_Hh_ - -#include "multiclass_tools_abstract.h" - -#include <vector> -#include <set> -#include "../unordered_pair.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template <typename label_type> - std::vector<label_type> select_all_distinct_labels ( - const std::vector<label_type>& labels - ) - { - std::set<label_type> temp; - temp.insert(labels.begin(), labels.end()); - return std::vector<label_type>(temp.begin(), temp.end()); - } - -// ---------------------------------------------------------------------------------------- - - template <typename label_type, typename U> - std::vector<unordered_pair<label_type> > find_missing_pairs ( - const std::map<unordered_pair<label_type>,U>& bdfs - ) - { - typedef std::map<unordered_pair<label_type>,U> map_type; - - // find all the labels - std::set<label_type> temp; - for (typename map_type::const_iterator i = bdfs.begin(); i != bdfs.end(); ++i) - { - temp.insert(i->first.first); - temp.insert(i->first.second); - } - - std::vector<unordered_pair<label_type> > missing_pairs; - - // now make sure all label pairs are present - typename std::set<label_type>::const_iterator i, j; - for (i = temp.begin(); i != temp.end(); ++i) - { - for (j = i, ++j; j != temp.end(); ++j) - { - const unordered_pair<label_type> p(*i, *j); - - if (bdfs.count(p) == 0) - missing_pairs.push_back(p); - } - } - - return missing_pairs; - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_MULTICLASS_TOoLS_Hh_ - - diff --git a/ml/dlib/dlib/svm/multiclass_tools_abstract.h b/ml/dlib/dlib/svm/multiclass_tools_abstract.h deleted file mode 100644 index 9e7774d3f..000000000 --- a/ml/dlib/dlib/svm/multiclass_tools_abstract.h +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_MULTICLASS_TOoLS_ABSTRACT_Hh_ -#ifdef DLIB_MULTICLASS_TOoLS_ABSTRACT_Hh_ - -#include <vector> -#include <map> -#include "../unordered_pair.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template <typename label_type> - std::vector<label_type> select_all_distinct_labels ( - const std::vector<label_type>& labels - ); - /*! - ensures - - Determines all distinct values present in labels and stores them - into a sorted vector and returns it. They are sorted in ascending - order. - !*/ - -// ---------------------------------------------------------------------------------------- - - template <typename label_type, typename U> - std::vector<unordered_pair<label_type> > find_missing_pairs ( - const std::map<unordered_pair<label_type>,U>& binary_decision_functions - ); - /*! - ensures - - Let L denote the set of all label_type values present in binary_decision_functions. - - This function finds all the label pairs with both elements distinct and in L but - not also in binary_decision_functions. All these missing pairs are stored - in a sorted vector and returned. They are sorted in ascending order. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_MULTICLASS_TOoLS_ABSTRACT_Hh_ - diff --git a/ml/dlib/dlib/svm/null_df.h b/ml/dlib/dlib/svm/null_df.h deleted file mode 100644 index 2cbbf04a7..000000000 --- a/ml/dlib/dlib/svm/null_df.h +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_NULL_DECISION_FUnCTION_Hh_ -#define DLIB_NULL_DECISION_FUnCTION_Hh_ - -#include <iostream> - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - struct null_df - { - /*! - WHAT THIS OBJECT REPRESENTS - This is a type used to represent an unused field in the list of template - arguments of the one_vs_one_decision_function and one_vs_all_decision_function - templates. As such, null_df doesn't actually do anything. - !*/ - template <typename T> - double operator() ( const T&) const { return 0; } - }; - - inline void serialize(const null_df&, std::ostream&) {} - inline void deserialize(null_df&, std::istream&) {} - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_NULL_DECISION_FUnCTION_Hh_ - diff --git a/ml/dlib/dlib/svm/null_trainer.h b/ml/dlib/dlib/svm/null_trainer.h deleted file mode 100644 index 015b00c15..000000000 --- a/ml/dlib/dlib/svm/null_trainer.h +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright (C) 2009 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_NULL_TRAINERs_H_ -#define DLIB_NULL_TRAINERs_H_ - -#include "null_trainer_abstract.h" -#include "../algs.h" -#include "function_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename dec_funct_type - > - class null_trainer_type - { - public: - typedef typename dec_funct_type::kernel_type kernel_type; - typedef typename dec_funct_type::scalar_type scalar_type; - typedef typename dec_funct_type::sample_type sample_type; - typedef typename dec_funct_type::mem_manager_type mem_manager_type; - typedef dec_funct_type trained_function_type; - - null_trainer_type ( - ){} - - null_trainer_type ( - const dec_funct_type& dec_funct_ - ) : dec_funct(dec_funct_) {} - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const dec_funct_type& train ( - const in_sample_vector_type& , - const in_scalar_vector_type& - ) const { return dec_funct; } - - private: - dec_funct_type dec_funct; - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename dec_funct_type - > - const null_trainer_type<dec_funct_type> null_trainer ( - const dec_funct_type& dec_funct - ) { return null_trainer_type<dec_funct_type>(dec_funct); } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_NULL_TRAINERs_H_ - diff --git a/ml/dlib/dlib/svm/null_trainer_abstract.h b/ml/dlib/dlib/svm/null_trainer_abstract.h deleted file mode 100644 index 25f6a5443..000000000 --- a/ml/dlib/dlib/svm/null_trainer_abstract.h +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright (C) 2009 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_NULL_TRAINERs_ABSTRACT_ -#ifdef DLIB_NULL_TRAINERs_ABSTRACT_ - -#include "../algs.h" -#include "function_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename dec_funct_type - > - class null_trainer_type - { - /*! - REQUIREMENTS ON dec_funct_type - dec_funct_type can be any copyable type that provides the needed - typedefs used below (e.g. kernel_type, scalar_type, etc.). - - WHAT THIS OBJECT REPRESENTS - This object is a simple tool for turning a decision function - into a trainer object that always returns the original decision - function when you try to train with it. - - dlib contains a few "training post processing" algorithms (e.g. - reduced() and reduced2()). These tools take in a trainer object, - tell it to perform training, and then they take the output decision - function and do some kind of post processing to it. The null_trainer_type - object is useful because you can use it to run an already - learned decision function through the training post processing - algorithms by turning a decision function into a null_trainer_type - and then giving it to a post processor. - !*/ - - public: - typedef typename dec_funct_type::kernel_type kernel_type; - typedef typename dec_funct_type::scalar_type scalar_type; - typedef typename dec_funct_type::sample_type sample_type; - typedef typename dec_funct_type::mem_manager_type mem_manager_type; - typedef dec_funct_type trained_function_type; - - null_trainer_type ( - ); - /*! - ensures - - any call to this->train(x,y) will return a default initialized - dec_funct_type object. - !*/ - - null_trainer_type ( - const dec_funct_type& dec_funct - ); - /*! - ensures - - any call to this->train(x,y) will always return a copy of - the given dec_funct object. - !*/ - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const dec_funct_type& train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const; - /*! - ensures - - returns a copy of the decision function object given to - this object's constructor. - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename dec_funct_type - > - const null_trainer_type<dec_funct_type> null_trainer ( - const dec_funct_type& dec_funct - ) { return null_trainer_type<dec_funct_type>(dec_funct); } - /*! - ensures - - returns a null_trainer_type object that has been instantiated with - the given arguments. That is, this function returns a null_trainer_type - trainer that will return a copy of the given dec_funct object every time - someone calls its train() function. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_NULL_TRAINERs_ABSTRACT_ - - diff --git a/ml/dlib/dlib/svm/num_nonnegative_weights.h b/ml/dlib/dlib/svm/num_nonnegative_weights.h deleted file mode 100644 index 4f21f9b69..000000000 --- a/ml/dlib/dlib/svm/num_nonnegative_weights.h +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright (C) 2012 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_NUM_NONNEGATIVE_WEIGHtS_Hh_ -#define DLIB_NUM_NONNEGATIVE_WEIGHtS_Hh_ - -#include "../enable_if.h" - -namespace dlib -{ - - namespace impl2 - { - template < - typename T, - unsigned long (T::*funct)()const - > - struct hnnf_helper - { - typedef char type; - }; - - template <typename T> - char has_num_nonnegative_weights_helper( typename hnnf_helper<T,&T::num_nonnegative_weights>::type = 0 ) { return 0;} - - struct two_bytes - { - char a[2]; - }; - - template <typename T> - two_bytes has_num_nonnegative_weights_helper(int) { return two_bytes();} - - template <typename T> - struct work_around_visual_studio_bug - { - const static unsigned long U = sizeof(has_num_nonnegative_weights_helper<T>('a')); - }; - - - // This is a template to tell you if a feature_extractor has a num_nonnegative_weights function or not. - template <typename T, unsigned long U = work_around_visual_studio_bug<T>::U > - struct has_num_nonnegative_weights - { - static const bool value = false; - }; - - template <typename T> - struct has_num_nonnegative_weights <T,1> - { - static const bool value = true; - }; - - - } - - // call fe.num_nonnegative_weights() if it exists, otherwise return 0. - template <typename feature_extractor> - typename enable_if<impl2::has_num_nonnegative_weights<feature_extractor>,unsigned long>::type num_nonnegative_weights ( - const feature_extractor& fe - ) - { - return fe.num_nonnegative_weights(); - } - - template <typename feature_extractor> - typename disable_if<impl2::has_num_nonnegative_weights<feature_extractor>,unsigned long>::type num_nonnegative_weights ( - const feature_extractor& /*fe*/ - ) - { - return 0; - } - -} - -#endif // DLIB_NUM_NONNEGATIVE_WEIGHtS_Hh_ - diff --git a/ml/dlib/dlib/svm/one_vs_all_decision_function.h b/ml/dlib/dlib/svm/one_vs_all_decision_function.h deleted file mode 100644 index 8afa52344..000000000 --- a/ml/dlib/dlib/svm/one_vs_all_decision_function.h +++ /dev/null @@ -1,265 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_ONE_VS_ALL_DECISION_FUnCTION_Hh_ -#define DLIB_ONE_VS_ALL_DECISION_FUnCTION_Hh_ - -#include "one_vs_all_decision_function_abstract.h" - -#include "../serialize.h" -#include "../type_safe_union.h" -#include <sstream> -#include <map> -#include "../any.h" -#include "null_df.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename one_vs_all_trainer, - typename DF1 = null_df, typename DF2 = null_df, typename DF3 = null_df, - typename DF4 = null_df, typename DF5 = null_df, typename DF6 = null_df, - typename DF7 = null_df, typename DF8 = null_df, typename DF9 = null_df, - typename DF10 = null_df - > - class one_vs_all_decision_function - { - public: - - typedef typename one_vs_all_trainer::label_type result_type; - typedef typename one_vs_all_trainer::sample_type sample_type; - typedef typename one_vs_all_trainer::scalar_type scalar_type; - typedef typename one_vs_all_trainer::mem_manager_type mem_manager_type; - - typedef std::map<result_type, any_decision_function<sample_type, scalar_type> > binary_function_table; - - one_vs_all_decision_function() :num_classes(0) {} - - explicit one_vs_all_decision_function( - const binary_function_table& dfs_ - ) : dfs(dfs_) - { - num_classes = dfs.size(); - } - - const binary_function_table& get_binary_decision_functions ( - ) const - { - return dfs; - } - - const std::vector<result_type> get_labels ( - ) const - { - std::vector<result_type> temp; - temp.reserve(dfs.size()); - for (typename binary_function_table::const_iterator i = dfs.begin(); i != dfs.end(); ++i) - { - temp.push_back(i->first); - } - return temp; - } - - - template < - typename df1, typename df2, typename df3, typename df4, typename df5, - typename df6, typename df7, typename df8, typename df9, typename df10 - > - one_vs_all_decision_function ( - const one_vs_all_decision_function<one_vs_all_trainer, - df1, df2, df3, df4, df5, - df6, df7, df8, df9, df10>& item - ) : dfs(item.get_binary_decision_functions()), num_classes(item.number_of_classes()) {} - - unsigned long number_of_classes ( - ) const - { - return num_classes; - } - - std::pair<result_type, scalar_type> predict ( - const sample_type& sample - ) const - { - DLIB_ASSERT(number_of_classes() != 0, - "\t pair<result_type,scalar_type> one_vs_all_decision_function::predict()" - << "\n\t You can't make predictions with an empty decision function." - << "\n\t this: " << this - ); - - result_type best_label = result_type(); - scalar_type best_score = -std::numeric_limits<scalar_type>::infinity(); - - // run all the classifiers over the sample and find the best one - for(typename binary_function_table::const_iterator i = dfs.begin(); i != dfs.end(); ++i) - { - const scalar_type score = i->second(sample); - - if (score > best_score) - { - best_score = score; - best_label = i->first; - } - } - - return std::make_pair(best_label, best_score); - } - - result_type operator() ( - const sample_type& sample - ) const - { - DLIB_ASSERT(number_of_classes() != 0, - "\t result_type one_vs_all_decision_function::operator()" - << "\n\t You can't make predictions with an empty decision function." - << "\n\t this: " << this - ); - - return predict(sample).first; - } - - - - private: - binary_function_table dfs; - unsigned long num_classes; - - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename DF1, typename DF2, typename DF3, - typename DF4, typename DF5, typename DF6, - typename DF7, typename DF8, typename DF9, - typename DF10 - > - void serialize( - const one_vs_all_decision_function<T,DF1,DF2,DF3,DF4,DF5,DF6,DF7,DF8,DF9,DF10>& item, - std::ostream& out - ) - { - try - { - type_safe_union<DF1,DF2,DF3,DF4,DF5,DF6,DF7,DF8,DF9,DF10> temp; - typedef typename T::label_type result_type; - typedef typename T::sample_type sample_type; - typedef typename T::scalar_type scalar_type; - typedef std::map<result_type, any_decision_function<sample_type, scalar_type> > binary_function_table; - - const unsigned long version = 1; - serialize(version, out); - - const unsigned long size = item.get_binary_decision_functions().size(); - serialize(size, out); - - for(typename binary_function_table::const_iterator i = item.get_binary_decision_functions().begin(); - i != item.get_binary_decision_functions().end(); ++i) - { - serialize(i->first, out); - - if (i->second.template contains<DF1>()) temp.template get<DF1>() = any_cast<DF1>(i->second); - else if (i->second.template contains<DF2>()) temp.template get<DF2>() = any_cast<DF2>(i->second); - else if (i->second.template contains<DF3>()) temp.template get<DF3>() = any_cast<DF3>(i->second); - else if (i->second.template contains<DF4>()) temp.template get<DF4>() = any_cast<DF4>(i->second); - else if (i->second.template contains<DF5>()) temp.template get<DF5>() = any_cast<DF5>(i->second); - else if (i->second.template contains<DF6>()) temp.template get<DF6>() = any_cast<DF6>(i->second); - else if (i->second.template contains<DF7>()) temp.template get<DF7>() = any_cast<DF7>(i->second); - else if (i->second.template contains<DF8>()) temp.template get<DF8>() = any_cast<DF8>(i->second); - else if (i->second.template contains<DF9>()) temp.template get<DF9>() = any_cast<DF9>(i->second); - else if (i->second.template contains<DF10>()) temp.template get<DF10>() = any_cast<DF10>(i->second); - else throw serialization_error("Can't serialize one_vs_all_decision_function. Not all decision functions defined."); - - serialize(temp,out); - } - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while serializing an object of type one_vs_all_decision_function"); - } - - } - -// ---------------------------------------------------------------------------------------- - - namespace impl_ova - { - template <typename sample_type, typename scalar_type> - struct copy_to_df_helper - { - copy_to_df_helper(any_decision_function<sample_type, scalar_type>& target_) : target(target_) {} - - any_decision_function<sample_type, scalar_type>& target; - - template <typename T> - void operator() ( - const T& item - ) const - { - target = item; - } - }; - } - - template < - typename T, - typename DF1, typename DF2, typename DF3, - typename DF4, typename DF5, typename DF6, - typename DF7, typename DF8, typename DF9, - typename DF10 - > - void deserialize( - one_vs_all_decision_function<T,DF1,DF2,DF3,DF4,DF5,DF6,DF7,DF8,DF9,DF10>& item, - std::istream& in - ) - { - try - { - type_safe_union<DF1,DF2,DF3,DF4,DF5,DF6,DF7,DF8,DF9,DF10> temp; - typedef typename T::label_type result_type; - typedef typename T::sample_type sample_type; - typedef typename T::scalar_type scalar_type; - typedef impl_ova::copy_to_df_helper<sample_type, scalar_type> copy_to; - - unsigned long version; - deserialize(version, in); - - if (version != 1) - throw serialization_error("Can't deserialize one_vs_all_decision_function. Wrong version."); - - unsigned long size; - deserialize(size, in); - - typedef std::map<result_type, any_decision_function<sample_type, scalar_type> > binary_function_table; - binary_function_table dfs; - - result_type l; - for (unsigned long i = 0; i < size; ++i) - { - deserialize(l, in); - deserialize(temp, in); - if (temp.template contains<null_df>()) - throw serialization_error("A sub decision function of unknown type was encountered."); - - temp.apply_to_contents(copy_to(dfs[l])); - } - - item = one_vs_all_decision_function<T,DF1,DF2,DF3,DF4,DF5,DF6,DF7,DF8,DF9,DF10>(dfs); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while deserializing an object of type one_vs_all_decision_function"); - } - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_ONE_VS_ALL_DECISION_FUnCTION_Hh_ - - - diff --git a/ml/dlib/dlib/svm/one_vs_all_decision_function_abstract.h b/ml/dlib/dlib/svm/one_vs_all_decision_function_abstract.h deleted file mode 100644 index 8daacb8d6..000000000 --- a/ml/dlib/dlib/svm/one_vs_all_decision_function_abstract.h +++ /dev/null @@ -1,214 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_ONE_VS_ALL_DECISION_FUnCTION_ABSTRACT_Hh_ -#ifdef DLIB_ONE_VS_ALL_DECISION_FUnCTION_ABSTRACT_Hh_ - - -#include "../serialize.h" -#include <map> -#include "../any/any_decision_function_abstract.h" -#include "one_vs_all_trainer_abstract.h" -#include "null_df.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename one_vs_all_trainer, - typename DF1 = null_df, typename DF2 = null_df, typename DF3 = null_df, - typename DF4 = null_df, typename DF5 = null_df, typename DF6 = null_df, - typename DF7 = null_df, typename DF8 = null_df, typename DF9 = null_df, - typename DF10 = null_df - > - class one_vs_all_decision_function - { - /*! - REQUIREMENTS ON one_vs_all_trainer - This should be an instantiation of the one_vs_all_trainer template. - It is used to infer which types are used for various things, such as - representing labels. - - REQUIREMENTS ON DF* - These types can either be left at their default values or set - to any kind of decision function object capable of being - stored in an any_decision_function<sample_type,scalar_type> - object. These types should also be serializable. - - WHAT THIS OBJECT REPRESENTS - This object represents a multiclass classifier built out of a set of - binary classifiers. Each binary classifier is used to vote for the - correct multiclass label using a one vs. all strategy. Therefore, - if you have N classes then there will be N binary classifiers inside - this object. - - Note that the DF* template arguments are only used if you want - to serialize and deserialize one_vs_all_decision_function objects. - Specifically, all the types of binary decision function contained - within a one_vs_all_decision_function must be listed in the - template arguments if serialization and deserialization is to - be used. - - THREAD SAFETY - It is always safe to use distinct instances of this object in different - threads. However, when a single instance is shared between threads then - the following rules apply: - It is safe to call the const members of this object from multiple - threads so long as all the decision functions contained in this object - are also threadsafe. This is because the const members are purely - read-only operations. However, any operation that modifies a - one_vs_all_decision_function is not threadsafe. - !*/ - public: - - typedef typename one_vs_all_trainer::label_type result_type; - typedef typename one_vs_all_trainer::sample_type sample_type; - typedef typename one_vs_all_trainer::scalar_type scalar_type; - typedef typename one_vs_all_trainer::mem_manager_type mem_manager_type; - - typedef std::map<result_type, any_decision_function<sample_type, scalar_type> > binary_function_table; - - one_vs_all_decision_function( - ); - /*! - ensures - - #number_of_classes() == 0 - - #get_binary_decision_functions().size() == 0 - - #get_labels().size() == 0 - !*/ - - explicit one_vs_all_decision_function( - const binary_function_table& decision_functions - ); - /*! - ensures - - #get_binary_decision_functions() == decision_functions - - #get_labels() == a list of all the labels which appear in the - given set of decision functions - - #number_of_classes() == #get_labels().size() - !*/ - - template < - typename df1, typename df2, typename df3, typename df4, typename df5, - typename df6, typename df7, typename df8, typename df9, typename df10 - > - one_vs_all_decision_function ( - const one_vs_all_decision_function<one_vs_all_trainer, - df1, df2, df3, df4, df5, - df6, df7, df8, df9, df10>& item - ); - /*! - ensures - - #*this will be a copy of item - - #number_of_classes() == item.number_of_classes() - - #get_labels() == item.get_labels() - - #get_binary_decision_functions() == item.get_binary_decision_functions() - !*/ - - const binary_function_table& get_binary_decision_functions ( - ) const; - /*! - ensures - - returns the table of binary decision functions used by this - object. The label given to a test sample is computed by - determining which binary decision function has the largest - (i.e. most positive) output and returning the label associated - with that decision function. - !*/ - - const std::vector<result_type> get_labels ( - ) const; - /*! - ensures - - returns a vector containing all the labels which can be - predicted by this object. - !*/ - - unsigned long number_of_classes ( - ) const; - /*! - ensures - - returns get_labels().size() - (i.e. returns the number of different labels/classes predicted by - this object) - !*/ - - std::pair<result_type, scalar_type> predict ( - const sample_type& sample - ) const; - /*! - requires - - number_of_classes() != 0 - ensures - - Evaluates all the decision functions in get_binary_decision_functions() - and returns the predicted label and score for the input sample. That is, - returns std::make_pair(label,score) - - The label is determined by whichever classifier outputs the largest - score. - !*/ - - result_type operator() ( - const sample_type& sample - ) const - /*! - requires - - number_of_classes() != 0 - ensures - - Evaluates all the decision functions in get_binary_decision_functions() - and returns the predicted label. That is, returns predict(sample).first. - !*/ - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename DF1, typename DF2, typename DF3, - typename DF4, typename DF5, typename DF6, - typename DF7, typename DF8, typename DF9, - typename DF10 - > - void serialize( - const one_vs_all_decision_function<T,DF1,DF2,DF3,DF4,DF5,DF6,DF7,DF8,DF9,DF10>& item, - std::ostream& out - ); - /*! - ensures - - writes the given item to the output stream out. - throws - - serialization_error. - This is thrown if there is a problem writing to the ostream or if item - contains a type of decision function not listed among the DF* template - arguments. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename DF1, typename DF2, typename DF3, - typename DF4, typename DF5, typename DF6, - typename DF7, typename DF8, typename DF9, - typename DF10 - > - void deserialize( - one_vs_all_decision_function<T,DF1,DF2,DF3,DF4,DF5,DF6,DF7,DF8,DF9,DF10>& item, - std::istream& in - ); - /*! - ensures - - deserializes a one_vs_all_decision_function from in and stores it in item. - throws - - serialization_error. - This is thrown if there is a problem reading from the istream or if the - serialized data contains decision functions not listed among the DF* - template arguments. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_ONE_VS_ALL_DECISION_FUnCTION_ABSTRACT_Hh_ - diff --git a/ml/dlib/dlib/svm/one_vs_all_trainer.h b/ml/dlib/dlib/svm/one_vs_all_trainer.h deleted file mode 100644 index bcb006a41..000000000 --- a/ml/dlib/dlib/svm/one_vs_all_trainer.h +++ /dev/null @@ -1,234 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_ONE_VS_ALL_TRAiNER_Hh_ -#define DLIB_ONE_VS_ALL_TRAiNER_Hh_ - -#include "one_vs_all_trainer_abstract.h" - -#include "one_vs_all_decision_function.h" -#include <vector> - -#include "multiclass_tools.h" - -#include <sstream> -#include <iostream> - -#include "../any.h" -#include <map> -#include <set> -#include "../threads.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename any_trainer, - typename label_type_ = double - > - class one_vs_all_trainer - { - public: - typedef label_type_ label_type; - - typedef typename any_trainer::sample_type sample_type; - typedef typename any_trainer::scalar_type scalar_type; - typedef typename any_trainer::mem_manager_type mem_manager_type; - - typedef one_vs_all_decision_function<one_vs_all_trainer> trained_function_type; - - one_vs_all_trainer ( - ) : - verbose(false), - num_threads(4) - {} - - void set_trainer ( - const any_trainer& trainer - ) - { - default_trainer = trainer; - trainers.clear(); - } - - void set_trainer ( - const any_trainer& trainer, - const label_type& l - ) - { - trainers[l] = trainer; - } - - void be_verbose ( - ) - { - verbose = true; - } - - void be_quiet ( - ) - { - verbose = false; - } - - void set_num_threads ( - unsigned long num - ) - { - num_threads = num; - } - - unsigned long get_num_threads ( - ) const - { - return num_threads; - } - - struct invalid_label : public dlib::error - { - invalid_label(const std::string& msg, const label_type& l_ - ) : dlib::error(msg), l(l_) {}; - - virtual ~invalid_label( - ) throw() {} - - label_type l; - }; - - trained_function_type train ( - const std::vector<sample_type>& all_samples, - const std::vector<label_type>& all_labels - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(is_learning_problem(all_samples,all_labels), - "\t trained_function_type one_vs_all_trainer::train(all_samples,all_labels)" - << "\n\t invalid inputs were given to this function" - << "\n\t all_samples.size(): " << all_samples.size() - << "\n\t all_labels.size(): " << all_labels.size() - ); - - const std::vector<label_type> distinct_labels = select_all_distinct_labels(all_labels); - - // make sure we have a trainer object for each of the label types. - for (unsigned long i = 0; i < distinct_labels.size(); ++i) - { - const label_type l = distinct_labels[i]; - const typename binary_function_table::const_iterator itr = trainers.find(l); - - if (itr == trainers.end() && default_trainer.is_empty()) - { - std::ostringstream sout; - sout << "In one_vs_all_trainer, no trainer registered for the " << l << " label."; - throw invalid_label(sout.str(), l); - } - } - - - // now do the training - parallel_for_helper helper(all_samples,all_labels,default_trainer,trainers,verbose,distinct_labels); - parallel_for(num_threads, 0, distinct_labels.size(), helper, 500); - - if (helper.error_message.size() != 0) - { - throw dlib::error("binary trainer threw while training one vs. all classifier. Error was: " + helper.error_message); - } - return trained_function_type(helper.dfs); - } - - private: - - typedef std::map<label_type, any_trainer> binary_function_table; - struct parallel_for_helper - { - parallel_for_helper( - const std::vector<sample_type>& all_samples_, - const std::vector<label_type>& all_labels_, - const any_trainer& default_trainer_, - const binary_function_table& trainers_, - const bool verbose_, - const std::vector<label_type>& distinct_labels_ - ) : - all_samples(all_samples_), - all_labels(all_labels_), - default_trainer(default_trainer_), - trainers(trainers_), - verbose(verbose_), - distinct_labels(distinct_labels_) - {} - - void operator()(long i) const - { - try - { - std::vector<scalar_type> labels; - - const label_type l = distinct_labels[i]; - - // setup one of the one vs all training sets - for (unsigned long k = 0; k < all_samples.size(); ++k) - { - if (all_labels[k] == l) - labels.push_back(+1); - else - labels.push_back(-1); - } - - - if (verbose) - { - auto_mutex lock(class_mutex); - std::cout << "Training classifier for " << l << " vs. all" << std::endl; - } - - any_trainer trainer; - // now train a binary classifier using the samples we selected - { auto_mutex lock(class_mutex); - const typename binary_function_table::const_iterator itr = trainers.find(l); - if (itr != trainers.end()) - trainer = itr->second; - else - trainer = default_trainer; - } - - any_decision_function<sample_type,scalar_type> binary_df = trainer.train(all_samples, labels); - - auto_mutex lock(class_mutex); - dfs[l] = binary_df; - } - catch (std::exception& e) - { - auto_mutex lock(class_mutex); - error_message = e.what(); - } - } - - mutable typename trained_function_type::binary_function_table dfs; - mutex class_mutex; - mutable std::string error_message; - - const std::vector<sample_type>& all_samples; - const std::vector<label_type>& all_labels; - const any_trainer& default_trainer; - const binary_function_table& trainers; - const bool verbose; - const std::vector<label_type>& distinct_labels; - }; - - any_trainer default_trainer; - - binary_function_table trainers; - - bool verbose; - unsigned long num_threads; - - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_ONE_VS_ALL_TRAiNER_Hh_ - - diff --git a/ml/dlib/dlib/svm/one_vs_all_trainer_abstract.h b/ml/dlib/dlib/svm/one_vs_all_trainer_abstract.h deleted file mode 100644 index fb719a7e4..000000000 --- a/ml/dlib/dlib/svm/one_vs_all_trainer_abstract.h +++ /dev/null @@ -1,163 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_ONE_VS_ALL_TRAiNER_ABSTRACT_Hh_ -#ifdef DLIB_ONE_VS_ALL_TRAiNER_ABSTRACT_Hh_ - - -#include "one_vs_all_decision_function_abstract.h" -#include <vector> - -#include "../any/any_trainer_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename any_trainer, - typename label_type_ = double - > - class one_vs_all_trainer - { - /*! - REQUIREMENTS ON any_trainer - must be an instantiation of the dlib::any_trainer template. - - REQUIREMENTS ON label_type_ - label_type_ must be default constructable, copyable, and comparable using - operator < and ==. It must also be possible to write it to an std::ostream - using operator<<. - - WHAT THIS OBJECT REPRESENTS - This object is a tool for turning a bunch of binary classifiers into a - multiclass classifier. It does this by training the binary classifiers - in a one vs. all fashion. That is, if you have N possible classes then - it trains N binary classifiers which are then used to vote on the identity - of a test sample. - - This object works with any kind of binary classification trainer object - capable of being assigned to an any_trainer object. (e.g. the svm_nu_trainer) - !*/ - - public: - - - typedef label_type_ label_type; - - typedef typename any_trainer::sample_type sample_type; - typedef typename any_trainer::scalar_type scalar_type; - typedef typename any_trainer::mem_manager_type mem_manager_type; - - typedef one_vs_all_decision_function<one_vs_all_trainer> trained_function_type; - - one_vs_all_trainer ( - ); - /*! - ensures - - This object is properly initialized. - - This object will not be verbose unless be_verbose() is called. - - No binary trainers are associated with *this. I.e. you have to - call set_trainer() before calling train(). - - #get_num_threads() == 4 - !*/ - - void set_trainer ( - const any_trainer& trainer - ); - /*! - ensures - - sets the trainer used for all binary subproblems. Any previous - calls to set_trainer() are overridden by this function. Even the - more specific set_trainer(trainer, l) form. - !*/ - - void set_trainer ( - const any_trainer& trainer, - const label_type& l - ); - /*! - ensures - - Sets the trainer object used to create a binary classifier to - distinguish l labeled samples from all other samples. - !*/ - - void be_verbose ( - ); - /*! - ensures - - This object will print status messages to standard out so that a - user can observe the progress of the algorithm. - !*/ - - void be_quiet ( - ); - /*! - ensures - - this object will not print anything to standard out - !*/ - - void set_num_threads ( - unsigned long num - ); - /*! - ensures - - #get_num_threads() == num - !*/ - - unsigned long get_num_threads ( - ) const; - /*! - ensures - - returns the number of threads used during training. You should - usually set this equal to the number of processing cores on your - machine. - !*/ - - struct invalid_label : public dlib::error - { - /*! - This is the exception thrown by the train() function below. - !*/ - label_type l; - }; - - trained_function_type train ( - const std::vector<sample_type>& all_samples, - const std::vector<label_type>& all_labels - ) const; - /*! - requires - - is_learning_problem(all_samples, all_labels) - ensures - - trains a bunch of binary classifiers in a one vs all fashion to solve the given - multiclass classification problem. - - returns a one_vs_all_decision_function F with the following properties: - - F contains all the learned binary classifiers and can be used to predict - the labels of new samples. - - if (new_x is a sample predicted to have a label of L) then - - F(new_x) == L - - F.get_labels() == select_all_distinct_labels(all_labels) - - F.number_of_classes() == select_all_distinct_labels(all_labels).size() - throws - - invalid_label - This exception is thrown if there are labels in all_labels which don't have - any corresponding trainer object. This will never happen if set_trainer(trainer) - has been called. However, if only the set_trainer(trainer,l) form has been - used then this exception is thrown if not all labels have been given a trainer. - - invalid_label::l will contain the label which is missing a trainer object. - Additionally, the exception will contain an informative error message available - via invalid_label::what(). - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_ONE_VS_ALL_TRAiNER_ABSTRACT_Hh_ - - - diff --git a/ml/dlib/dlib/svm/one_vs_one_decision_function.h b/ml/dlib/dlib/svm/one_vs_one_decision_function.h deleted file mode 100644 index 02a5fa51e..000000000 --- a/ml/dlib/dlib/svm/one_vs_one_decision_function.h +++ /dev/null @@ -1,291 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_ONE_VS_ONE_DECISION_FUnCTION_Hh_ -#define DLIB_ONE_VS_ONE_DECISION_FUnCTION_Hh_ - -#include "one_vs_one_decision_function_abstract.h" - -#include "../serialize.h" -#include "../type_safe_union.h" -#include <iostream> -#include <sstream> -#include <set> -#include <map> -#include "../any.h" -#include "../unordered_pair.h" -#include "null_df.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename one_vs_one_trainer, - typename DF1 = null_df, typename DF2 = null_df, typename DF3 = null_df, - typename DF4 = null_df, typename DF5 = null_df, typename DF6 = null_df, - typename DF7 = null_df, typename DF8 = null_df, typename DF9 = null_df, - typename DF10 = null_df - > - class one_vs_one_decision_function - { - public: - - typedef typename one_vs_one_trainer::label_type result_type; - typedef typename one_vs_one_trainer::sample_type sample_type; - typedef typename one_vs_one_trainer::scalar_type scalar_type; - typedef typename one_vs_one_trainer::mem_manager_type mem_manager_type; - - typedef std::map<unordered_pair<result_type>, any_decision_function<sample_type, scalar_type> > binary_function_table; - - one_vs_one_decision_function() :num_classes(0) {} - - explicit one_vs_one_decision_function( - const binary_function_table& dfs_ - ) : dfs(dfs_) - { -#ifdef ENABLE_ASSERTS - { - const std::vector<unordered_pair<result_type> > missing_pairs = find_missing_pairs(dfs_); - if (missing_pairs.size() != 0) - { - std::ostringstream sout; - for (unsigned long i = 0; i < missing_pairs.size(); ++i) - { - sout << "\t (" << missing_pairs[i].first << ", " << missing_pairs[i].second << ")\n"; - } - DLIB_ASSERT(missing_pairs.size() == 0, - "\t void one_vs_one_decision_function::one_vs_one_decision_function()" - << "\n\t The supplied set of binary decision functions is incomplete." - << "\n\t this: " << this - << "\n\t Classifiers are missing for the following label pairs: \n" << sout.str() - ); - } - } -#endif - - // figure out how many labels are covered by this set of binary decision functions - std::set<result_type> labels; - for (typename binary_function_table::const_iterator i = dfs.begin(); i != dfs.end(); ++i) - { - labels.insert(i->first.first); - labels.insert(i->first.second); - } - num_classes = labels.size(); - } - - const binary_function_table& get_binary_decision_functions ( - ) const - { - return dfs; - } - - const std::vector<result_type> get_labels ( - ) const - { - std::set<result_type> labels; - for (typename binary_function_table::const_iterator i = dfs.begin(); i != dfs.end(); ++i) - { - labels.insert(i->first.first); - labels.insert(i->first.second); - } - return std::vector<result_type>(labels.begin(), labels.end()); - } - - - template < - typename df1, typename df2, typename df3, typename df4, typename df5, - typename df6, typename df7, typename df8, typename df9, typename df10 - > - one_vs_one_decision_function ( - const one_vs_one_decision_function<one_vs_one_trainer, - df1, df2, df3, df4, df5, - df6, df7, df8, df9, df10>& item - ) : dfs(item.get_binary_decision_functions()), num_classes(item.number_of_classes()) {} - - unsigned long number_of_classes ( - ) const - { - return num_classes; - } - - result_type operator() ( - const sample_type& sample - ) const - { - DLIB_ASSERT(number_of_classes() != 0, - "\t void one_vs_one_decision_function::operator()" - << "\n\t You can't make predictions with an empty decision function." - << "\n\t this: " << this - ); - - std::map<result_type,int> votes; - - // run all the classifiers over the sample - for(typename binary_function_table::const_iterator i = dfs.begin(); i != dfs.end(); ++i) - { - const scalar_type score = i->second(sample); - - if (score > 0) - votes[i->first.first] += 1; - else - votes[i->first.second] += 1; - } - - // now figure out who had the most votes - result_type best_label = result_type(); - int best_votes = 0; - for (typename std::map<result_type,int>::iterator i = votes.begin(); i != votes.end(); ++i) - { - if (i->second > best_votes) - { - best_votes = i->second; - best_label = i->first; - } - } - - return best_label; - } - - - - private: - binary_function_table dfs; - unsigned long num_classes; - - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename DF1, typename DF2, typename DF3, - typename DF4, typename DF5, typename DF6, - typename DF7, typename DF8, typename DF9, - typename DF10 - > - void serialize( - const one_vs_one_decision_function<T,DF1,DF2,DF3,DF4,DF5,DF6,DF7,DF8,DF9,DF10>& item, - std::ostream& out - ) - { - try - { - type_safe_union<DF1,DF2,DF3,DF4,DF5,DF6,DF7,DF8,DF9,DF10> temp; - typedef typename T::label_type result_type; - typedef typename T::sample_type sample_type; - typedef typename T::scalar_type scalar_type; - typedef std::map<unordered_pair<result_type>, any_decision_function<sample_type, scalar_type> > binary_function_table; - - const unsigned long version = 1; - serialize(version, out); - - const unsigned long size = item.get_binary_decision_functions().size(); - serialize(size, out); - - for(typename binary_function_table::const_iterator i = item.get_binary_decision_functions().begin(); - i != item.get_binary_decision_functions().end(); ++i) - { - serialize(i->first, out); - - if (i->second.template contains<DF1>()) temp.template get<DF1>() = any_cast<DF1>(i->second); - else if (i->second.template contains<DF2>()) temp.template get<DF2>() = any_cast<DF2>(i->second); - else if (i->second.template contains<DF3>()) temp.template get<DF3>() = any_cast<DF3>(i->second); - else if (i->second.template contains<DF4>()) temp.template get<DF4>() = any_cast<DF4>(i->second); - else if (i->second.template contains<DF5>()) temp.template get<DF5>() = any_cast<DF5>(i->second); - else if (i->second.template contains<DF6>()) temp.template get<DF6>() = any_cast<DF6>(i->second); - else if (i->second.template contains<DF7>()) temp.template get<DF7>() = any_cast<DF7>(i->second); - else if (i->second.template contains<DF8>()) temp.template get<DF8>() = any_cast<DF8>(i->second); - else if (i->second.template contains<DF9>()) temp.template get<DF9>() = any_cast<DF9>(i->second); - else if (i->second.template contains<DF10>()) temp.template get<DF10>() = any_cast<DF10>(i->second); - else throw serialization_error("Can't serialize one_vs_one_decision_function. Not all decision functions defined."); - - serialize(temp,out); - } - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while serializing an object of type one_vs_one_decision_function"); - } - - } - -// ---------------------------------------------------------------------------------------- - - namespace impl - { - template <typename sample_type, typename scalar_type> - struct copy_to_df_helper - { - copy_to_df_helper(any_decision_function<sample_type, scalar_type>& target_) : target(target_) {} - - any_decision_function<sample_type, scalar_type>& target; - - template <typename T> - void operator() ( - const T& item - ) const - { - target = item; - } - }; - } - - template < - typename T, - typename DF1, typename DF2, typename DF3, - typename DF4, typename DF5, typename DF6, - typename DF7, typename DF8, typename DF9, - typename DF10 - > - void deserialize( - one_vs_one_decision_function<T,DF1,DF2,DF3,DF4,DF5,DF6,DF7,DF8,DF9,DF10>& item, - std::istream& in - ) - { - try - { - type_safe_union<DF1,DF2,DF3,DF4,DF5,DF6,DF7,DF8,DF9,DF10> temp; - typedef typename T::label_type result_type; - typedef typename T::sample_type sample_type; - typedef typename T::scalar_type scalar_type; - typedef impl::copy_to_df_helper<sample_type, scalar_type> copy_to; - - unsigned long version; - deserialize(version, in); - - if (version != 1) - throw serialization_error("Can't deserialize one_vs_one_decision_function. Wrong version."); - - unsigned long size; - deserialize(size, in); - - typedef std::map<unordered_pair<result_type>, any_decision_function<sample_type, scalar_type> > binary_function_table; - binary_function_table dfs; - - unordered_pair<result_type> p; - for (unsigned long i = 0; i < size; ++i) - { - deserialize(p, in); - deserialize(temp, in); - if (temp.template contains<null_df>()) - throw serialization_error("A sub decision function of unknown type was encountered."); - - temp.apply_to_contents(copy_to(dfs[p])); - } - - item = one_vs_one_decision_function<T,DF1,DF2,DF3,DF4,DF5,DF6,DF7,DF8,DF9,DF10>(dfs); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while deserializing an object of type one_vs_one_decision_function"); - } - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_ONE_VS_ONE_DECISION_FUnCTION_Hh_ - - diff --git a/ml/dlib/dlib/svm/one_vs_one_decision_function_abstract.h b/ml/dlib/dlib/svm/one_vs_one_decision_function_abstract.h deleted file mode 100644 index cf22e0ba7..000000000 --- a/ml/dlib/dlib/svm/one_vs_one_decision_function_abstract.h +++ /dev/null @@ -1,213 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_ONE_VS_ONE_DECISION_FUnCTION_ABSTRACT_Hh_ -#ifdef DLIB_ONE_VS_ONE_DECISION_FUnCTION_ABSTRACT_Hh_ - - -#include "../serialize.h" -#include <map> -#include "../any/any_decision_function_abstract.h" -#include "../unordered_pair.h" -#include "one_vs_one_trainer_abstract.h" -#include "null_df.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename one_vs_one_trainer, - typename DF1 = null_df, typename DF2 = null_df, typename DF3 = null_df, - typename DF4 = null_df, typename DF5 = null_df, typename DF6 = null_df, - typename DF7 = null_df, typename DF8 = null_df, typename DF9 = null_df, - typename DF10 = null_df - > - class one_vs_one_decision_function - { - /*! - REQUIREMENTS ON one_vs_one_trainer - This should be an instantiation of the one_vs_one_trainer template. - It is used to infer which types are used for various things, such as - representing labels. - - REQUIREMENTS ON DF* - These types can either be left at their default values or set - to any kind of decision function object capable of being - stored in an any_decision_function<sample_type,scalar_type> - object. These types should also be serializable. - - WHAT THIS OBJECT REPRESENTS - This object represents a multiclass classifier built out - of a set of binary classifiers. Each binary classifier - is used to vote for the correct multiclass label using a - one vs. one strategy. Therefore, if you have N classes then - there will be N*(N-1)/2 binary classifiers inside this object. - - Note that the DF* template arguments are only used if you want - to serialize and deserialize one_vs_one_decision_function objects. - Specifically, all the types of binary decision function contained - within a one_vs_one_decision_function must be listed in the - template arguments if serialization and deserialization is to - be used. - - THREAD SAFETY - It is always safe to use distinct instances of this object in different - threads. However, when a single instance is shared between threads then - the following rules apply: - It is safe to call the const members of this object from multiple - threads so long as all the decision functions contained in this object - are also threadsafe. This is because the const members are purely - read-only operations. However, any operation that modifies a - one_vs_one_decision_function is not threadsafe. - !*/ - public: - - typedef typename one_vs_one_trainer::label_type result_type; - typedef typename one_vs_one_trainer::sample_type sample_type; - typedef typename one_vs_one_trainer::scalar_type scalar_type; - typedef typename one_vs_one_trainer::mem_manager_type mem_manager_type; - - typedef std::map<unordered_pair<result_type>, any_decision_function<sample_type, scalar_type> > binary_function_table; - - one_vs_one_decision_function( - ); - /*! - ensures - - #number_of_classes() == 0 - - #get_binary_decision_functions().size() == 0 - - #get_labels().size() == 0 - !*/ - - explicit one_vs_one_decision_function( - const binary_function_table& decision_functions - ); - /*! - requires - - find_missing_pairs(decision_functions).size() == 0 - (i.e. all pairs of labels have an associated decision function) - ensures - - #get_binary_decision_functions() == decision_functions - - #get_labels() == a list of all the labels which appear in the - given set of decision functions - - #number_of_classes() == #get_labels().size() - !*/ - - template < - typename df1, typename df2, typename df3, typename df4, typename df5, - typename df6, typename df7, typename df8, typename df9, typename df10 - > - one_vs_one_decision_function ( - const one_vs_one_decision_function<one_vs_one_trainer, - df1, df2, df3, df4, df5, - df6, df7, df8, df9, df10>& item - ); - /*! - ensures - - #*this will be a copy of item - - #number_of_classes() == item.number_of_classes() - - #get_labels() == item.get_labels() - - #get_binary_decision_functions() == item.get_binary_decision_functions() - !*/ - - const binary_function_table& get_binary_decision_functions ( - ) const; - /*! - ensures - - returns the table of binary decision functions used by this - object. The correspondence between binary decision functions - and multiclass labels is the following: - - for each element i of get_binary_decision_functions() - - i->first == the label pair associated with binary decision - function i->second. - - if (decision function i->second outputs a value > 0) then - - i->second is indicating that a test sample should - receive a label of i->first.first - - else - - i->second is indicating that a test sample should - receive a label of i->first.second - !*/ - - const std::vector<result_type> get_labels ( - ) const; - /*! - ensures - - returns a vector containing all the labels which can be - predicted by this object. - !*/ - - unsigned long number_of_classes ( - ) const; - /*! - ensures - - returns get_labels().size() - (i.e. returns the number of different labels/classes predicted by - this object) - !*/ - - result_type operator() ( - const sample_type& sample - ) const - /*! - requires - - number_of_classes() != 0 - ensures - - evaluates all the decision functions in get_binary_decision_functions() - and returns the label which received the most votes. - !*/ - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename DF1, typename DF2, typename DF3, - typename DF4, typename DF5, typename DF6, - typename DF7, typename DF8, typename DF9, - typename DF10 - > - void serialize( - const one_vs_one_decision_function<T,DF1,DF2,DF3,DF4,DF5,DF6,DF7,DF8,DF9,DF10>& item, - std::ostream& out - ); - /*! - ensures - - writes the given item to the output stream out. - throws - - serialization_error. - This is thrown if there is a problem writing to the ostream or if item - contains a type of decision function not listed among the DF* template - arguments. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename DF1, typename DF2, typename DF3, - typename DF4, typename DF5, typename DF6, - typename DF7, typename DF8, typename DF9, - typename DF10 - > - void deserialize( - one_vs_one_decision_function<T,DF1,DF2,DF3,DF4,DF5,DF6,DF7,DF8,DF9,DF10>& item, - std::istream& in - ); - /*! - ensures - - deserializes a one_vs_one_decision_function from in and stores it in item. - throws - - serialization_error. - This is thrown if there is a problem reading from the istream or if the - serialized data contains decision functions not listed among the DF* - template arguments. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_ONE_VS_ONE_DECISION_FUnCTION_ABSTRACT_Hh_ - - - diff --git a/ml/dlib/dlib/svm/one_vs_one_trainer.h b/ml/dlib/dlib/svm/one_vs_one_trainer.h deleted file mode 100644 index 2beec8f67..000000000 --- a/ml/dlib/dlib/svm/one_vs_one_trainer.h +++ /dev/null @@ -1,249 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_ONE_VS_ONE_TRAiNER_Hh_ -#define DLIB_ONE_VS_ONE_TRAiNER_Hh_ - -#include "one_vs_one_trainer_abstract.h" - -#include "one_vs_one_decision_function.h" -#include <vector> - -#include "../unordered_pair.h" -#include "multiclass_tools.h" - -#include <sstream> -#include <iostream> - -#include "../any.h" -#include <map> -#include <set> -#include "../threads.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename any_trainer, - typename label_type_ = double - > - class one_vs_one_trainer - { - public: - typedef label_type_ label_type; - - typedef typename any_trainer::sample_type sample_type; - typedef typename any_trainer::scalar_type scalar_type; - typedef typename any_trainer::mem_manager_type mem_manager_type; - - typedef one_vs_one_decision_function<one_vs_one_trainer> trained_function_type; - - one_vs_one_trainer ( - ) : - verbose(false), - num_threads(4) - {} - - void set_trainer ( - const any_trainer& trainer - ) - { - default_trainer = trainer; - trainers.clear(); - } - - void set_trainer ( - const any_trainer& trainer, - const label_type& l1, - const label_type& l2 - ) - { - trainers[make_unordered_pair(l1,l2)] = trainer; - } - - void be_verbose ( - ) - { - verbose = true; - } - - void be_quiet ( - ) - { - verbose = false; - } - - void set_num_threads ( - unsigned long num - ) - { - num_threads = num; - } - - unsigned long get_num_threads ( - ) const - { - return num_threads; - } - - struct invalid_label : public dlib::error - { - invalid_label(const std::string& msg, const label_type& l1_, const label_type& l2_ - ) : dlib::error(msg), l1(l1_), l2(l2_) {}; - - virtual ~invalid_label( - ) throw() {} - - label_type l1, l2; - }; - - trained_function_type train ( - const std::vector<sample_type>& all_samples, - const std::vector<label_type>& all_labels - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(is_learning_problem(all_samples,all_labels), - "\t trained_function_type one_vs_one_trainer::train(all_samples,all_labels)" - << "\n\t invalid inputs were given to this function" - << "\n\t all_samples.size(): " << all_samples.size() - << "\n\t all_labels.size(): " << all_labels.size() - ); - - const std::vector<label_type> distinct_labels = select_all_distinct_labels(all_labels); - - - // fill pairs with all the pairs of labels. - std::vector<unordered_pair<label_type> > pairs; - for (unsigned long i = 0; i < distinct_labels.size(); ++i) - { - for (unsigned long j = i+1; j < distinct_labels.size(); ++j) - { - pairs.push_back(unordered_pair<label_type>(distinct_labels[i], distinct_labels[j])); - - // make sure we have a trainer for this pair - const typename binary_function_table::const_iterator itr = trainers.find(pairs.back()); - if (itr == trainers.end() && default_trainer.is_empty()) - { - std::ostringstream sout; - sout << "In one_vs_one_trainer, no trainer registered for the (" - << pairs.back().first << ", " << pairs.back().second << ") label pair."; - throw invalid_label(sout.str(), pairs.back().first, pairs.back().second); - } - } - } - - - - // Now train on all the label pairs. - parallel_for_helper helper(all_samples,all_labels,default_trainer,trainers,verbose,pairs); - parallel_for(num_threads, 0, pairs.size(), helper, 500); - - if (helper.error_message.size() != 0) - { - throw dlib::error("binary trainer threw while training one vs. one classifier. Error was: " + helper.error_message); - } - return trained_function_type(helper.dfs); - } - - private: - - typedef std::map<unordered_pair<label_type>, any_trainer> binary_function_table; - - struct parallel_for_helper - { - parallel_for_helper( - const std::vector<sample_type>& all_samples_, - const std::vector<label_type>& all_labels_, - const any_trainer& default_trainer_, - const binary_function_table& trainers_, - const bool verbose_, - const std::vector<unordered_pair<label_type> >& pairs_ - ) : - all_samples(all_samples_), - all_labels(all_labels_), - default_trainer(default_trainer_), - trainers(trainers_), - verbose(verbose_), - pairs(pairs_) - {} - - void operator()(long i) const - { - try - { - std::vector<sample_type> samples; - std::vector<scalar_type> labels; - - const unordered_pair<label_type> p = pairs[i]; - - // pick out the samples corresponding to these two classes - for (unsigned long k = 0; k < all_samples.size(); ++k) - { - if (all_labels[k] == p.first) - { - samples.push_back(all_samples[k]); - labels.push_back(+1); - } - else if (all_labels[k] == p.second) - { - samples.push_back(all_samples[k]); - labels.push_back(-1); - } - } - - if (verbose) - { - auto_mutex lock(class_mutex); - std::cout << "Training classifier for " << p.first << " vs. " << p.second << std::endl; - } - - any_trainer trainer; - // now train a binary classifier using the samples we selected - { auto_mutex lock(class_mutex); - const typename binary_function_table::const_iterator itr = trainers.find(p); - if (itr != trainers.end()) - trainer = itr->second; - else - trainer = default_trainer; - } - - any_decision_function<sample_type,scalar_type> binary_df = trainer.train(samples, labels); - - auto_mutex lock(class_mutex); - dfs[p] = binary_df; - } - catch (std::exception& e) - { - auto_mutex lock(class_mutex); - error_message = e.what(); - } - } - - mutable typename trained_function_type::binary_function_table dfs; - mutex class_mutex; - mutable std::string error_message; - - const std::vector<sample_type>& all_samples; - const std::vector<label_type>& all_labels; - const any_trainer& default_trainer; - const binary_function_table& trainers; - const bool verbose; - const std::vector<unordered_pair<label_type> >& pairs; - }; - - - any_trainer default_trainer; - binary_function_table trainers; - bool verbose; - unsigned long num_threads; - - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_ONE_VS_ONE_TRAiNER_Hh_ - diff --git a/ml/dlib/dlib/svm/one_vs_one_trainer_abstract.h b/ml/dlib/dlib/svm/one_vs_one_trainer_abstract.h deleted file mode 100644 index 42ba35815..000000000 --- a/ml/dlib/dlib/svm/one_vs_one_trainer_abstract.h +++ /dev/null @@ -1,166 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_ONE_VS_ONE_TRAiNER_ABSTRACT_Hh_ -#ifdef DLIB_ONE_VS_ONE_TRAiNER_ABSTRACT_Hh_ - - -#include "one_vs_one_decision_function_abstract.h" -#include <vector> - -#include "../any/any_trainer_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename any_trainer, - typename label_type_ = double - > - class one_vs_one_trainer - { - /*! - REQUIREMENTS ON any_trainer - must be an instantiation of the dlib::any_trainer template. - - REQUIREMENTS ON label_type_ - label_type_ must be default constructable, copyable, and comparable using - operator < and ==. It must also be possible to write it to an std::ostream - using operator<<. - - WHAT THIS OBJECT REPRESENTS - This object is a tool for turning a bunch of binary classifiers - into a multiclass classifier. It does this by training the binary - classifiers in a one vs. one fashion. That is, if you have N possible - classes then it trains N*(N-1)/2 binary classifiers which are then used - to vote on the identity of a test sample. - - This object works with any kind of binary classification trainer object - capable of being assigned to an any_trainer object. (e.g. the svm_nu_trainer) - !*/ - - public: - - - typedef label_type_ label_type; - - typedef typename any_trainer::sample_type sample_type; - typedef typename any_trainer::scalar_type scalar_type; - typedef typename any_trainer::mem_manager_type mem_manager_type; - - typedef one_vs_one_decision_function<one_vs_one_trainer> trained_function_type; - - one_vs_one_trainer ( - ); - /*! - ensures - - This object is properly initialized - - This object will not be verbose unless be_verbose() is called. - - No binary trainers are associated with *this. I.e. you have to - call set_trainer() before calling train(). - - #get_num_threads() == 4 - !*/ - - void set_trainer ( - const any_trainer& trainer - ); - /*! - ensures - - sets the trainer used for all pairs of training. Any previous - calls to set_trainer() are overridden by this function. Even the - more specific set_trainer(trainer, l1, l2) form. - !*/ - - void set_trainer ( - const any_trainer& trainer, - const label_type& l1, - const label_type& l2 - ); - /*! - requires - - l1 != l2 - ensures - - Sets the trainer object used to create a binary classifier to - distinguish l1 labeled samples from l2 labeled samples. - !*/ - - void be_verbose ( - ); - /*! - ensures - - This object will print status messages to standard out so that a - user can observe the progress of the algorithm. - !*/ - - void be_quiet ( - ); - /*! - ensures - - this object will not print anything to standard out - !*/ - - void set_num_threads ( - unsigned long num - ); - /*! - ensures - - #get_num_threads() == num - !*/ - - unsigned long get_num_threads ( - ) const; - /*! - ensures - - returns the number of threads used during training. You should - usually set this equal to the number of processing cores on your - machine. - !*/ - - struct invalid_label : public dlib::error - { - /*! - This is the exception thrown by the train() function below. - !*/ - label_type l1, l2; - }; - - trained_function_type train ( - const std::vector<sample_type>& all_samples, - const std::vector<label_type>& all_labels - ) const; - /*! - requires - - is_learning_problem(all_samples, all_labels) - ensures - - trains a bunch of binary classifiers in a one vs one fashion to solve the given - multiclass classification problem. - - returns a one_vs_one_decision_function F with the following properties: - - F contains all the learned binary classifiers and can be used to predict - the labels of new samples. - - if (new_x is a sample predicted to have a label of L) then - - F(new_x) == L - - F.get_labels() == select_all_distinct_labels(all_labels) - - F.number_of_classes() == select_all_distinct_labels(all_labels).size() - throws - - invalid_label - This exception is thrown if there are labels in all_labels which don't have - any corresponding trainer object. This will never happen if set_trainer(trainer) - has been called. However, if only the set_trainer(trainer,l1,l2) form has been - used then this exception is thrown if not all necessary label pairs have been - given a trainer. - - invalid_label::l1 and invalid_label::l2 will contain the label pair which is - missing a trainer object. Additionally, the exception will contain an - informative error message available via invalid_label::what(). - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_ONE_VS_ONE_TRAiNER_ABSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/svm/pegasos.h b/ml/dlib/dlib/svm/pegasos.h deleted file mode 100644 index c28093fe0..000000000 --- a/ml/dlib/dlib/svm/pegasos.h +++ /dev/null @@ -1,710 +0,0 @@ -// Copyright (C) 2009 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_PEGASoS_ -#define DLIB_PEGASoS_ - -#include "pegasos_abstract.h" -#include <cmath> -#include "../algs.h" -#include "function.h" -#include "kernel.h" -#include "kcentroid.h" -#include <iostream> -#include <memory> - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename K - > - class svm_pegasos - { - typedef kcentroid<offset_kernel<K> > kc_type; - - public: - typedef K kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef decision_function<kernel_type> trained_function_type; - - template <typename K_> - struct rebind { - typedef svm_pegasos<K_> other; - }; - - svm_pegasos ( - ) : - max_sv(40), - lambda_c1(0.0001), - lambda_c2(0.0001), - tau(0.01), - tolerance(0.01), - train_count(0), - w(offset_kernel<kernel_type>(kernel,tau),tolerance, max_sv, false) - { - } - - svm_pegasos ( - const kernel_type& kernel_, - const scalar_type& lambda_, - const scalar_type& tolerance_, - unsigned long max_num_sv - ) : - max_sv(max_num_sv), - kernel(kernel_), - lambda_c1(lambda_), - lambda_c2(lambda_), - tau(0.01), - tolerance(tolerance_), - train_count(0), - w(offset_kernel<kernel_type>(kernel,tau),tolerance, max_sv, false) - { - // make sure requires clause is not broken - DLIB_ASSERT(lambda_ > 0 && tolerance > 0 && max_num_sv > 0, - "\tsvm_pegasos::svm_pegasos(kernel,lambda,tolerance)" - << "\n\t invalid inputs were given to this function" - << "\n\t lambda_: " << lambda_ - << "\n\t max_num_sv: " << max_num_sv - ); - } - - void clear ( - ) - { - // reset the w vector back to its initial state - w = kc_type(offset_kernel<kernel_type>(kernel,tau),tolerance, max_sv, false); - train_count = 0; - } - - void set_kernel ( - kernel_type k - ) - { - kernel = k; - clear(); - } - - void set_max_num_sv ( - unsigned long max_num_sv - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(max_num_sv > 0, - "\tvoid svm_pegasos::set_max_num_sv(max_num_sv)" - << "\n\t invalid inputs were given to this function" - << "\n\t max_num_sv: " << max_num_sv - ); - max_sv = max_num_sv; - clear(); - } - - unsigned long get_max_num_sv ( - ) const - { - return max_sv; - } - - void set_tolerance ( - double tol - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(0 < tol, - "\tvoid svm_pegasos::set_tolerance(tol)" - << "\n\t invalid inputs were given to this function" - << "\n\t tol: " << tol - ); - tolerance = tol; - clear(); - } - - void set_lambda ( - scalar_type lambda_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(0 < lambda_, - "\tvoid svm_pegasos::set_lambda(lambda_)" - << "\n\t invalid inputs were given to this function" - << "\n\t lambda_: " << lambda_ - ); - lambda_c1 = lambda_; - lambda_c2 = lambda_; - - max_wnorm = 1/std::sqrt(std::min(lambda_c1, lambda_c2)); - clear(); - } - - void set_lambda_class1 ( - scalar_type lambda_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(0 < lambda_, - "\tvoid svm_pegasos::set_lambda_class1(lambda_)" - << "\n\t invalid inputs were given to this function" - << "\n\t lambda_: " << lambda_ - ); - lambda_c1 = lambda_; - max_wnorm = 1/std::sqrt(std::min(lambda_c1, lambda_c2)); - clear(); - } - - void set_lambda_class2 ( - scalar_type lambda_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(0 < lambda_, - "\tvoid svm_pegasos::set_lambda_class2(lambda_)" - << "\n\t invalid inputs were given to this function" - << "\n\t lambda_: " << lambda_ - ); - lambda_c2 = lambda_; - max_wnorm = 1/std::sqrt(std::min(lambda_c1, lambda_c2)); - clear(); - } - - const scalar_type get_lambda_class1 ( - ) const - { - return lambda_c1; - } - - const scalar_type get_lambda_class2 ( - ) const - { - return lambda_c2; - } - - const scalar_type get_tolerance ( - ) const - { - return tolerance; - } - - const kernel_type get_kernel ( - ) const - { - return kernel; - } - - unsigned long get_train_count ( - ) const - { - return static_cast<unsigned long>(train_count); - } - - scalar_type train ( - const sample_type& x, - const scalar_type& y - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(y == -1 || y == 1, - "\tscalar_type svm_pegasos::train(x,y)" - << "\n\t invalid inputs were given to this function" - << "\n\t y: " << y - ); - - const double lambda = (y==+1)? lambda_c1 : lambda_c2; - - ++train_count; - const scalar_type learning_rate = 1/(lambda*train_count); - - // if this sample point is within the margin of the current hyperplane - if (y*w.inner_product(x) < 1) - { - - // compute: w = (1-learning_rate*lambda)*w + y*learning_rate*x - w.train(x, 1 - learning_rate*lambda, y*learning_rate); - - scalar_type wnorm = std::sqrt(w.squared_norm()); - scalar_type temp = max_wnorm/wnorm; - if (temp < 1) - w.scale_by(temp); - } - else - { - w.scale_by(1 - learning_rate*lambda); - } - - // return the current learning rate - return 1/(std::min(lambda_c1,lambda_c2)*train_count); - } - - scalar_type operator() ( - const sample_type& x - ) const - { - return w.inner_product(x); - } - - const decision_function<kernel_type> get_decision_function ( - ) const - { - distance_function<offset_kernel<kernel_type> > df = w.get_distance_function(); - return decision_function<kernel_type>(df.get_alpha(), -tau*sum(df.get_alpha()), kernel, df.get_basis_vectors()); - } - - void swap ( - svm_pegasos& item - ) - { - exchange(max_sv, item.max_sv); - exchange(kernel, item.kernel); - exchange(lambda_c1, item.lambda_c1); - exchange(lambda_c2, item.lambda_c2); - exchange(max_wnorm, item.max_wnorm); - exchange(tau, item.tau); - exchange(tolerance, item.tolerance); - exchange(train_count, item.train_count); - exchange(w, item.w); - } - - friend void serialize(const svm_pegasos& item, std::ostream& out) - { - serialize(item.max_sv, out); - serialize(item.kernel, out); - serialize(item.lambda_c1, out); - serialize(item.lambda_c2, out); - serialize(item.max_wnorm, out); - serialize(item.tau, out); - serialize(item.tolerance, out); - serialize(item.train_count, out); - serialize(item.w, out); - } - - friend void deserialize(svm_pegasos& item, std::istream& in) - { - deserialize(item.max_sv, in); - deserialize(item.kernel, in); - deserialize(item.lambda_c1, in); - deserialize(item.lambda_c2, in); - deserialize(item.max_wnorm, in); - deserialize(item.tau, in); - deserialize(item.tolerance, in); - deserialize(item.train_count, in); - deserialize(item.w, in); - } - - private: - - unsigned long max_sv; - kernel_type kernel; - scalar_type lambda_c1; - scalar_type lambda_c2; - scalar_type max_wnorm; - scalar_type tau; - scalar_type tolerance; - scalar_type train_count; - kc_type w; - - }; // end of class svm_pegasos - - template < - typename K - > - void swap ( - svm_pegasos<K>& a, - svm_pegasos<K>& b - ) { a.swap(b); } - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename U - > - void replicate_settings ( - const svm_pegasos<T>& source, - svm_pegasos<U>& dest - ) - { - dest.set_tolerance(source.get_tolerance()); - dest.set_lambda_class1(source.get_lambda_class1()); - dest.set_lambda_class2(source.get_lambda_class2()); - dest.set_max_num_sv(source.get_max_num_sv()); - } - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type - > - class batch_trainer - { - - // ------------------------------------------------------------------------------------ - - template < - typename K, - typename sample_vector_type - > - class caching_kernel - { - public: - typedef typename K::scalar_type scalar_type; - typedef long sample_type; - //typedef typename K::sample_type sample_type; - typedef typename K::mem_manager_type mem_manager_type; - - caching_kernel () {} - - caching_kernel ( - const K& kern, - const sample_vector_type& samps, - long cache_size_ - ) : real_kernel(kern), samples(&samps), counter(0) - { - cache_size = std::min<long>(cache_size_, samps.size()); - - cache.reset(new cache_type); - cache->frequency_of_use.resize(samps.size()); - for (long i = 0; i < samps.size(); ++i) - cache->frequency_of_use[i] = std::make_pair(0, i); - - // Set the cache build/rebuild threshold so that we have to have - // as many cache misses as there are entries in the cache before - // we build/rebuild. - counter_threshold = samps.size()*cache_size; - cache->sample_location.assign(samples->size(), -1); - } - - scalar_type operator() ( - const sample_type& a, - const sample_type& b - ) const - { - // rebuild the cache every so often - if (counter > counter_threshold ) - { - build_cache(); - } - - const long a_loc = cache->sample_location[a]; - const long b_loc = cache->sample_location[b]; - - cache->frequency_of_use[a].first += 1; - cache->frequency_of_use[b].first += 1; - - if (a_loc != -1) - { - return cache->kernel(a_loc, b); - } - else if (b_loc != -1) - { - return cache->kernel(b_loc, a); - } - else - { - ++counter; - return real_kernel((*samples)(a), (*samples)(b)); - } - } - - bool operator== ( - const caching_kernel& item - ) const - { - return item.real_kernel == real_kernel && - item.samples == samples; - } - - private: - K real_kernel; - - void build_cache ( - ) const - { - std::sort(cache->frequency_of_use.rbegin(), cache->frequency_of_use.rend()); - counter = 0; - - - cache->kernel.set_size(cache_size, samples->size()); - cache->sample_location.assign(samples->size(), -1); - - // loop over all the samples in the cache - for (long i = 0; i < cache_size; ++i) - { - const long cur = cache->frequency_of_use[i].second; - cache->sample_location[cur] = i; - - // now populate all possible kernel products with the current sample - for (long j = 0; j < samples->size(); ++j) - { - cache->kernel(i, j) = real_kernel((*samples)(cur), (*samples)(j)); - } - - } - - // reset the frequency of use metrics - for (long i = 0; i < samples->size(); ++i) - cache->frequency_of_use[i] = std::make_pair(0, i); - } - - - struct cache_type - { - matrix<scalar_type> kernel; - - std::vector<long> sample_location; // where in the cache a sample is. -1 means not in cache - std::vector<std::pair<long,long> > frequency_of_use; - }; - - const sample_vector_type* samples = 0; - - std::shared_ptr<cache_type> cache; - mutable unsigned long counter = 0; - unsigned long counter_threshold = 0; - long cache_size = 0; - }; - - // ------------------------------------------------------------------------------------ - - public: - typedef typename trainer_type::kernel_type kernel_type; - typedef typename trainer_type::scalar_type scalar_type; - typedef typename trainer_type::sample_type sample_type; - typedef typename trainer_type::mem_manager_type mem_manager_type; - typedef typename trainer_type::trained_function_type trained_function_type; - - - batch_trainer ( - ) : - min_learning_rate(0.1), - use_cache(false), - cache_size(100) - { - } - - batch_trainer ( - const trainer_type& trainer_, - const scalar_type min_learning_rate_, - bool verbose_, - bool use_cache_, - long cache_size_ = 100 - ) : - trainer(trainer_), - min_learning_rate(min_learning_rate_), - verbose(verbose_), - use_cache(use_cache_), - cache_size(cache_size_) - { - // make sure requires clause is not broken - DLIB_ASSERT(0 < min_learning_rate_ && - cache_size_ > 0, - "\tbatch_trainer::batch_trainer()" - << "\n\t invalid inputs were given to this function" - << "\n\t min_learning_rate_: " << min_learning_rate_ - << "\n\t cache_size_: " << cache_size_ - ); - - trainer.clear(); - } - - const scalar_type get_min_learning_rate ( - ) const - { - return min_learning_rate; - } - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const - { - if (use_cache) - return do_train_cached(mat(x), mat(y)); - else - return do_train(mat(x), mat(y)); - } - - private: - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> do_train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const - { - - dlib::rand rnd; - - trainer_type my_trainer(trainer); - - scalar_type cur_learning_rate = min_learning_rate + 10; - unsigned long count = 0; - - while (cur_learning_rate > min_learning_rate) - { - const long i = rnd.get_random_32bit_number()%x.size(); - // keep feeding the trainer data until its learning rate goes below our threshold - cur_learning_rate = my_trainer.train(x(i), y(i)); - - if (verbose) - { - if ( (count&0x7FF) == 0) - { - std::cout << "\rbatch_trainer(): Percent complete: " - << 100*min_learning_rate/cur_learning_rate << " " << std::flush; - } - ++count; - } - } - - if (verbose) - { - decision_function<kernel_type> df = my_trainer.get_decision_function(); - std::cout << "\rbatch_trainer(): Percent complete: 100 " << std::endl; - std::cout << " Num sv: " << df.basis_vectors.size() << std::endl; - std::cout << " bias: " << df.b << std::endl; - return df; - } - else - { - return my_trainer.get_decision_function(); - } - } - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> do_train_cached ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const - { - - dlib::rand rnd; - - // make a caching kernel - typedef caching_kernel<kernel_type, in_sample_vector_type> ckernel_type; - ckernel_type ck(trainer.get_kernel(), x, cache_size); - - // now rebind the trainer to use the caching kernel - typedef typename trainer_type::template rebind<ckernel_type>::other rebound_trainer_type; - rebound_trainer_type my_trainer; - my_trainer.set_kernel(ck); - replicate_settings(trainer, my_trainer); - - scalar_type cur_learning_rate = min_learning_rate + 10; - unsigned long count = 0; - - while (cur_learning_rate > min_learning_rate) - { - const long i = rnd.get_random_32bit_number()%x.size(); - // keep feeding the trainer data until its learning rate goes below our threshold - cur_learning_rate = my_trainer.train(i, y(i)); - - if (verbose) - { - if ( (count&0x7FF) == 0) - { - std::cout << "\rbatch_trainer(): Percent complete: " - << 100*min_learning_rate/cur_learning_rate << " " << std::flush; - } - ++count; - } - } - - if (verbose) - { - decision_function<ckernel_type> cached_df; - cached_df = my_trainer.get_decision_function(); - - std::cout << "\rbatch_trainer(): Percent complete: 100 " << std::endl; - std::cout << " Num sv: " << cached_df.basis_vectors.size() << std::endl; - std::cout << " bias: " << cached_df.b << std::endl; - - return decision_function<kernel_type> ( - cached_df.alpha, - cached_df.b, - trainer.get_kernel(), - rowm(x, cached_df.basis_vectors) - ); - } - else - { - decision_function<ckernel_type> cached_df; - cached_df = my_trainer.get_decision_function(); - - return decision_function<kernel_type> ( - cached_df.alpha, - cached_df.b, - trainer.get_kernel(), - rowm(x, cached_df.basis_vectors) - ); - } - } - - trainer_type trainer; - scalar_type min_learning_rate; - bool verbose; - bool use_cache; - long cache_size; - - }; // end of class batch_trainer - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type - > - const batch_trainer<trainer_type> batch ( - const trainer_type& trainer, - const typename trainer_type::scalar_type min_learning_rate = 0.1 - ) { return batch_trainer<trainer_type>(trainer, min_learning_rate, false, false); } - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type - > - const batch_trainer<trainer_type> verbose_batch ( - const trainer_type& trainer, - const typename trainer_type::scalar_type min_learning_rate = 0.1 - ) { return batch_trainer<trainer_type>(trainer, min_learning_rate, true, false); } - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type - > - const batch_trainer<trainer_type> batch_cached ( - const trainer_type& trainer, - const typename trainer_type::scalar_type min_learning_rate = 0.1, - long cache_size = 100 - ) { return batch_trainer<trainer_type>(trainer, min_learning_rate, false, true, cache_size); } - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type - > - const batch_trainer<trainer_type> verbose_batch_cached ( - const trainer_type& trainer, - const typename trainer_type::scalar_type min_learning_rate = 0.1, - long cache_size = 100 - ) { return batch_trainer<trainer_type>(trainer, min_learning_rate, true, true, cache_size); } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_PEGASoS_ - diff --git a/ml/dlib/dlib/svm/pegasos_abstract.h b/ml/dlib/dlib/svm/pegasos_abstract.h deleted file mode 100644 index 008b1cb94..000000000 --- a/ml/dlib/dlib/svm/pegasos_abstract.h +++ /dev/null @@ -1,514 +0,0 @@ -// Copyright (C) 2009 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_PEGASoS_ABSTRACT_ -#ifdef DLIB_PEGASoS_ABSTRACT_ - -#include <cmath> -#include "../algs.h" -#include "function_abstract.h" -#include "kernel_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename kern_type - > - class svm_pegasos - { - /*! - REQUIREMENTS ON kern_type - is a kernel function object as defined in dlib/svm/kernel_abstract.h - - WHAT THIS OBJECT REPRESENTS - This object implements an online algorithm for training a support - vector machine for solving binary classification problems. - - The implementation of the Pegasos algorithm used by this object is based - on the following excellent paper: - Pegasos: Primal estimated sub-gradient solver for SVM (2007) - by Shai Shalev-Shwartz, Yoram Singer, Nathan Srebro - In ICML - - This SVM training algorithm has two interesting properties. First, the - pegasos algorithm itself converges to the solution in an amount of time - unrelated to the size of the training set (in addition to being quite fast - to begin with). This makes it an appropriate algorithm for learning from - very large datasets. Second, this object uses the dlib::kcentroid object - to maintain a sparse approximation of the learned decision function. - This means that the number of support vectors in the resulting decision - function is also unrelated to the size of the dataset (in normal SVM - training algorithms, the number of support vectors grows approximately - linearly with the size of the training set). - !*/ - - public: - typedef kern_type kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef decision_function<kernel_type> trained_function_type; - - template <typename K_> - struct rebind { - typedef svm_pegasos<K_> other; - }; - - svm_pegasos ( - ); - /*! - ensures - - this object is properly initialized - - #get_lambda_class1() == 0.0001 - - #get_lambda_class2() == 0.0001 - - #get_tolerance() == 0.01 - - #get_train_count() == 0 - - #get_max_num_sv() == 40 - !*/ - - svm_pegasos ( - const kernel_type& kernel_, - const scalar_type& lambda_, - const scalar_type& tolerance_, - unsigned long max_num_sv - ); - /*! - requires - - lambda_ > 0 - - tolerance_ > 0 - - max_num_sv > 0 - ensures - - this object is properly initialized - - #get_lambda_class1() == lambda_ - - #get_lambda_class2() == lambda_ - - #get_tolerance() == tolerance_ - - #get_kernel() == kernel_ - - #get_train_count() == 0 - - #get_max_num_sv() == max_num_sv - !*/ - - void clear ( - ); - /*! - ensures - - #get_train_count() == 0 - - clears out any memory of previous calls to train() - - doesn't change any of the algorithm parameters. I.e. - - #get_lambda_class1() == get_lambda_class1() - - #get_lambda_class2() == get_lambda_class2() - - #get_tolerance() == get_tolerance() - - #get_kernel() == get_kernel() - - #get_max_num_sv() == get_max_num_sv() - !*/ - - const scalar_type get_lambda_class1 ( - ) const; - /*! - ensures - - returns the SVM regularization term for the +1 class. It is the - parameter that determines the trade off between trying to fit the - +1 training data exactly or allowing more errors but hopefully - improving the generalization ability of the resulting classifier. - Smaller values encourage exact fitting while larger values may - encourage better generalization. It is also worth noting that the - number of iterations it takes for this algorithm to converge is - proportional to 1/lambda. So smaller values of this term cause - the running time of this algorithm to increase. For more - information you should consult the paper referenced above. - !*/ - - const scalar_type get_lambda_class2 ( - ) const; - /*! - ensures - - returns the SVM regularization term for the -1 class. It has - the same properties as the get_lambda_class1() parameter except that - it applies to the -1 class. - !*/ - - const scalar_type get_tolerance ( - ) const; - /*! - ensures - - returns the tolerance used by the internal kcentroid object to - represent the learned decision function. Smaller values of this - tolerance will result in a more accurate representation of the - decision function but will use more support vectors (up to - a max of get_max_num_sv()). - !*/ - - unsigned long get_max_num_sv ( - ) const; - /*! - ensures - - returns the maximum number of support vectors this object is - allowed to use. - !*/ - - const kernel_type get_kernel ( - ) const; - /*! - ensures - - returns the kernel used by this object - !*/ - - void set_kernel ( - kernel_type k - ); - /*! - ensures - - #get_kernel() == k - - #get_train_count() == 0 - (i.e. clears any memory of previous training) - !*/ - - void set_tolerance ( - double tol - ); - /*! - requires - - tol > 0 - ensures - - #get_tolerance() == tol - - #get_train_count() == 0 - (i.e. clears any memory of previous training) - !*/ - - void set_max_num_sv ( - unsigned long max_num_sv - ); - /*! - requires - - max_num_sv > 0 - ensures - - #get_max_num_sv() == max_num_sv - - #get_train_count() == 0 - (i.e. clears any memory of previous training) - !*/ - - void set_lambda ( - scalar_type lambda_ - ); - /*! - requires - - lambda_ > 0 - ensures - - #get_lambda_class1() == lambda_ - - #get_lambda_class2() == lambda_ - - #get_train_count() == 0 - (i.e. clears any memory of previous training) - !*/ - - void set_lambda_class1 ( - scalar_type lambda_ - ); - /*! - requires - - lambda_ > 0 - ensures - - #get_lambda_class1() == lambda_ - #get_train_count() == 0 - (i.e. clears any memory of previous training) - !*/ - - void set_lambda_class2 ( - scalar_type lambda_ - ); - /*! - requires - - lambda_ > 0 - ensures - - #get_lambda_class2() == lambda_ - #get_train_count() == 0 - (i.e. clears any memory of previous training) - !*/ - - unsigned long get_train_count ( - ) const; - /*! - ensures - - returns how many times this->train() has been called - since this object was constructed or last cleared. - !*/ - - scalar_type train ( - const sample_type& x, - const scalar_type& y - ); - /*! - requires - - y == 1 || y == -1 - ensures - - trains this svm using the given sample x and label y - - #get_train_count() == get_train_count() + 1 - - returns the current learning rate - (i.e. 1/(get_train_count()*min(get_lambda_class1(),get_lambda_class2())) ) - !*/ - - scalar_type operator() ( - const sample_type& x - ) const; - /*! - ensures - - classifies the given x sample using the decision function - this object has learned so far. - - if (x is a sample predicted have +1 label) then - - returns a number >= 0 - - else - - returns a number < 0 - !*/ - - const decision_function<kernel_type> get_decision_function ( - ) const; - /*! - ensures - - returns a decision function F that represents the function learned - by this object so far. I.e. it is the case that: - - for all x: F(x) == (*this)(x) - !*/ - - void swap ( - svm_pegasos& item - ); - /*! - ensures - - swaps *this and item - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename kern_type - > - void swap( - svm_pegasos<kern_type>& a, - svm_pegasos<kern_type>& b - ) { a.swap(b); } - /*! - provides a global swap function - !*/ - - template < - typename kern_type - > - void serialize ( - const svm_pegasos<kern_type>& item, - std::ostream& out - ); - /*! - provides serialization support for svm_pegasos objects - !*/ - - template < - typename kern_type - > - void deserialize ( - svm_pegasos<kern_type>& item, - std::istream& in - ); - /*! - provides serialization support for svm_pegasos objects - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename U - > - void replicate_settings ( - const svm_pegasos<T>& source, - svm_pegasos<U>& dest - ); - /*! - ensures - - copies all the parameters from the source trainer to the dest trainer. - - #dest.get_tolerance() == source.get_tolerance() - - #dest.get_lambda_class1() == source.get_lambda_class1() - - #dest.get_lambda_class2() == source.get_lambda_class2() - - #dest.get_max_num_sv() == source.get_max_num_sv() - !*/ - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type - > - class batch_trainer - { - /*! - REQUIREMENTS ON trainer_type - - trainer_type == some kind of online trainer object (e.g. svm_pegasos) - replicate_settings() must also be defined for the type. - - WHAT THIS OBJECT REPRESENTS - This is a trainer object that is meant to wrap online trainer objects - that create decision_functions. It turns an online learning algorithm - such as svm_pegasos into a batch learning object. This allows you to - use objects like svm_pegasos with functions (e.g. cross_validate_trainer) - that expect batch mode training objects. - !*/ - - public: - typedef typename trainer_type::kernel_type kernel_type; - typedef typename trainer_type::scalar_type scalar_type; - typedef typename trainer_type::sample_type sample_type; - typedef typename trainer_type::mem_manager_type mem_manager_type; - typedef typename trainer_type::trained_function_type trained_function_type; - - - batch_trainer ( - ); - /*! - ensures - - This object is in an uninitialized state. You must - construct a real one with the other constructor and assign it - to this instance before you use this object. - !*/ - - batch_trainer ( - const trainer_type& online_trainer, - const scalar_type min_learning_rate_, - bool verbose_, - bool use_cache_, - long cache_size_ = 100 - ); - /*! - requires - - min_learning_rate_ > 0 - - cache_size_ > 0 - ensures - - returns a batch trainer object that uses the given online_trainer object - to train a decision function. - - #get_min_learning_rate() == min_learning_rate_ - - if (verbose_ == true) then - - this object will output status messages to standard out while - training is under way. - - if (use_cache_ == true) then - - this object will cache up to cache_size_ columns of the kernel - matrix during the training process. - !*/ - - const scalar_type get_min_learning_rate ( - ) const; - /*! - ensures - - returns the min learning rate that the online trainer must reach - before this object considers training to be complete. - !*/ - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const; - /*! - ensures - - trains and returns a decision_function using the trainer that was - supplied to this object's constructor. - - training continues until the online training object indicates that - its learning rate has dropped below get_min_learning_rate(). - throws - - std::bad_alloc - - any exceptions thrown by the trainer_type object - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type - > - const batch_trainer<trainer_type> batch ( - const trainer_type& trainer, - const typename trainer_type::scalar_type min_learning_rate = 0.1 - ) { return batch_trainer<trainer_type>(trainer, min_learning_rate, false, false); } - /*! - requires - - min_learning_rate > 0 - - trainer_type == some kind of online trainer object that creates decision_function - objects (e.g. svm_pegasos). replicate_settings() must also be defined for the type. - ensures - - returns a batch_trainer object that has been instantiated with the - given arguments. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type - > - const batch_trainer<trainer_type> verbose_batch ( - const trainer_type& trainer, - const typename trainer_type::scalar_type min_learning_rate = 0.1 - ) { return batch_trainer<trainer_type>(trainer, min_learning_rate, true, false); } - /*! - requires - - min_learning_rate > 0 - - trainer_type == some kind of online trainer object that creates decision_function - objects (e.g. svm_pegasos). replicate_settings() must also be defined for the type. - ensures - - returns a batch_trainer object that has been instantiated with the - given arguments (and is verbose). - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type - > - const batch_trainer<trainer_type> batch_cached ( - const trainer_type& trainer, - const typename trainer_type::scalar_type min_learning_rate = 0.1, - long cache_size = 100 - ) { return batch_trainer<trainer_type>(trainer, min_learning_rate, false, true, cache_size); } - /*! - requires - - min_learning_rate > 0 - - cache_size > 0 - - trainer_type == some kind of online trainer object that creates decision_function - objects (e.g. svm_pegasos). replicate_settings() must also be defined for the type. - ensures - - returns a batch_trainer object that has been instantiated with the - given arguments (uses a kernel cache). - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type - > - const batch_trainer<trainer_type> verbose_batch_cached ( - const trainer_type& trainer, - const typename trainer_type::scalar_type min_learning_rate = 0.1, - long cache_size = 100 - ) { return batch_trainer<trainer_type>(trainer, min_learning_rate, true, true, cache_size); } - /*! - requires - - min_learning_rate > 0 - - cache_size > 0 - - trainer_type == some kind of online trainer object that creates decision_function - objects (e.g. svm_pegasos). replicate_settings() must also be defined for the type. - ensures - - returns a batch_trainer object that has been instantiated with the - given arguments (is verbose and uses a kernel cache). - !*/ - -// ---------------------------------------------------------------------------------------- - - -} - -#endif // DLIB_PEGASoS_ABSTRACT_ - - diff --git a/ml/dlib/dlib/svm/ranking_tools.h b/ml/dlib/dlib/svm/ranking_tools.h deleted file mode 100644 index 3c77b41ae..000000000 --- a/ml/dlib/dlib/svm/ranking_tools.h +++ /dev/null @@ -1,448 +0,0 @@ -// Copyright (C) 2012 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_RANKING_ToOLS_Hh_ -#define DLIB_RANKING_ToOLS_Hh_ - -#include "ranking_tools_abstract.h" - -#include "../algs.h" -#include "../matrix.h" -#include <vector> -#include <utility> -#include <algorithm> -#include "sparse_vector.h" -#include "../statistics.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - struct ranking_pair - { - ranking_pair() {} - - ranking_pair( - const std::vector<T>& r, - const std::vector<T>& nr - ) : - relevant(r), nonrelevant(nr) - {} - - std::vector<T> relevant; - std::vector<T> nonrelevant; - }; - - template < - typename T - > - void serialize ( - const ranking_pair<T>& item, - std::ostream& out - ) - { - int version = 1; - serialize(version, out); - serialize(item.relevant, out); - serialize(item.nonrelevant, out); - } - - - template < - typename T - > - void deserialize ( - ranking_pair<T>& item, - std::istream& in - ) - { - int version = 0; - deserialize(version, in); - if (version != 1) - throw dlib::serialization_error("Wrong version found while deserializing dlib::ranking_pair"); - - deserialize(item.relevant, in); - deserialize(item.nonrelevant, in); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - typename disable_if<is_matrix<T>,bool>::type is_ranking_problem ( - const std::vector<ranking_pair<T> >& samples - ) - { - if (samples.size() == 0) - return false; - - - for (unsigned long i = 0; i < samples.size(); ++i) - { - if (samples[i].relevant.size() == 0) - return false; - if (samples[i].nonrelevant.size() == 0) - return false; - } - - return true; - } - - template < - typename T - > - typename enable_if<is_matrix<T>,bool>::type is_ranking_problem ( - const std::vector<ranking_pair<T> >& samples - ) - { - if (samples.size() == 0) - return false; - - - for (unsigned long i = 0; i < samples.size(); ++i) - { - if (samples[i].relevant.size() == 0) - return false; - if (samples[i].nonrelevant.size() == 0) - return false; - } - - // If these are dense vectors then they must all have the same dimensionality. - const long dims = max_index_plus_one(samples[0].relevant); - for (unsigned long i = 0; i < samples.size(); ++i) - { - for (unsigned long j = 0; j < samples[i].relevant.size(); ++j) - { - if (is_vector(samples[i].relevant[j]) == false) - return false; - - if (samples[i].relevant[j].size() != dims) - return false; - } - for (unsigned long j = 0; j < samples[i].nonrelevant.size(); ++j) - { - if (is_vector(samples[i].nonrelevant[j]) == false) - return false; - - if (samples[i].nonrelevant[j].size() != dims) - return false; - } - } - - return true; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - unsigned long max_index_plus_one ( - const ranking_pair<T>& item - ) - { - return std::max(max_index_plus_one(item.relevant), max_index_plus_one(item.nonrelevant)); - } - - template < - typename T - > - unsigned long max_index_plus_one ( - const std::vector<ranking_pair<T> >& samples - ) - { - unsigned long dims = 0; - for (unsigned long i = 0; i < samples.size(); ++i) - { - dims = std::max(dims, max_index_plus_one(samples[i])); - } - return dims; - } - -// ---------------------------------------------------------------------------------------- - - template <typename T> - void count_ranking_inversions ( - const std::vector<T>& x, - const std::vector<T>& y, - std::vector<unsigned long>& x_count, - std::vector<unsigned long>& y_count - ) - { - x_count.assign(x.size(),0); - y_count.assign(y.size(),0); - - if (x.size() == 0 || y.size() == 0) - return; - - std::vector<std::pair<T,unsigned long> > xsort(x.size()); - std::vector<std::pair<T,unsigned long> > ysort(y.size()); - for (unsigned long i = 0; i < x.size(); ++i) - xsort[i] = std::make_pair(x[i], i); - for (unsigned long j = 0; j < y.size(); ++j) - ysort[j] = std::make_pair(y[j], j); - - std::sort(xsort.begin(), xsort.end()); - std::sort(ysort.begin(), ysort.end()); - - - unsigned long i, j; - - // Do the counting for the x values. - for (i = 0, j = 0; i < x_count.size(); ++i) - { - // Skip past y values that are in the correct order with respect to xsort[i]. - while (j < ysort.size() && ysort[j].first < xsort[i].first) - ++j; - - x_count[xsort[i].second] = ysort.size() - j; - } - - - // Now do the counting for the y values. - for (i = 0, j = 0; j < y_count.size(); ++j) - { - // Skip past x values that are in the incorrect order with respect to ysort[j]. - while (i < xsort.size() && !(ysort[j].first < xsort[i].first)) - ++i; - - y_count[ysort[j].second] = i; - } - } - -// ---------------------------------------------------------------------------------------- - - namespace impl - { - inline bool compare_first_reverse_second ( - const std::pair<double,bool>& a, - const std::pair<double,bool>& b - ) - { - if (a.first < b.first) - return true; - else if (a.first > b.first) - return false; - else if (a.second && !b.second) - return true; - else - return false; - } - } - - template < - typename ranking_function, - typename T - > - matrix<double,1,2> test_ranking_function ( - const ranking_function& funct, - const std::vector<ranking_pair<T> >& samples - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_ranking_problem(samples), - "\t double test_ranking_function()" - << "\n\t invalid inputs were given to this function" - << "\n\t samples.size(): " << samples.size() - << "\n\t is_ranking_problem(samples): " << is_ranking_problem(samples) - ); - - unsigned long total_pairs = 0; - unsigned long total_wrong = 0; - - std::vector<double> rel_scores; - std::vector<double> nonrel_scores; - std::vector<unsigned long> rel_counts; - std::vector<unsigned long> nonrel_counts; - - running_stats<double> rs; - std::vector<std::pair<double,bool> > total_scores; - std::vector<bool> total_ranking; - - for (unsigned long i = 0; i < samples.size(); ++i) - { - rel_scores.resize(samples[i].relevant.size()); - nonrel_scores.resize(samples[i].nonrelevant.size()); - total_scores.clear(); - - for (unsigned long k = 0; k < rel_scores.size(); ++k) - { - rel_scores[k] = funct(samples[i].relevant[k]); - total_scores.push_back(std::make_pair(rel_scores[k], true)); - } - - for (unsigned long k = 0; k < nonrel_scores.size(); ++k) - { - nonrel_scores[k] = funct(samples[i].nonrelevant[k]); - total_scores.push_back(std::make_pair(nonrel_scores[k], false)); - } - - // Now compute the average precision for this sample. We need to sort the - // results and the back them into total_ranking. Note that we sort them so - // that, if you get a block of ranking values that are all equal, the elements - // marked as true will come last. This prevents a ranking from outputting a - // constant value for everything and still getting a good MAP score. - std::sort(total_scores.rbegin(), total_scores.rend(), impl::compare_first_reverse_second); - total_ranking.clear(); - for (unsigned long i = 0; i < total_scores.size(); ++i) - total_ranking.push_back(total_scores[i].second); - rs.add(average_precision(total_ranking)); - - - count_ranking_inversions(rel_scores, nonrel_scores, rel_counts, nonrel_counts); - - total_pairs += rel_scores.size()*nonrel_scores.size(); - - // Note that we don't need to look at nonrel_counts since it is redundant with - // the information in rel_counts in this case. - total_wrong += sum(mat(rel_counts)); - } - - const double rank_swaps = static_cast<double>(total_pairs - total_wrong) / total_pairs; - const double mean_average_precision = rs.mean(); - matrix<double,1,2> res; - res = rank_swaps, mean_average_precision; - return res; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename ranking_function, - typename T - > - matrix<double,1,2> test_ranking_function ( - const ranking_function& funct, - const ranking_pair<T>& sample - ) - { - return test_ranking_function(funct, std::vector<ranking_pair<T> >(1,sample)); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type, - typename T - > - matrix<double,1,2> cross_validate_ranking_trainer ( - const trainer_type& trainer, - const std::vector<ranking_pair<T> >& samples, - const long folds - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_ranking_problem(samples) && - 1 < folds && folds <= static_cast<long>(samples.size()), - "\t double cross_validate_ranking_trainer()" - << "\n\t invalid inputs were given to this function" - << "\n\t samples.size(): " << samples.size() - << "\n\t folds: " << folds - << "\n\t is_ranking_problem(samples): " << is_ranking_problem(samples) - ); - - - const long num_in_test = samples.size()/folds; - const long num_in_train = samples.size() - num_in_test; - - - std::vector<ranking_pair<T> > samples_test, samples_train; - - - long next_test_idx = 0; - - unsigned long total_pairs = 0; - unsigned long total_wrong = 0; - - std::vector<double> rel_scores; - std::vector<double> nonrel_scores; - std::vector<unsigned long> rel_counts; - std::vector<unsigned long> nonrel_counts; - - running_stats<double> rs; - std::vector<std::pair<double,bool> > total_scores; - std::vector<bool> total_ranking; - - for (long i = 0; i < folds; ++i) - { - samples_test.clear(); - samples_train.clear(); - - // load up the test samples - for (long cnt = 0; cnt < num_in_test; ++cnt) - { - samples_test.push_back(samples[next_test_idx]); - next_test_idx = (next_test_idx + 1)%samples.size(); - } - - // load up the training samples - long next = next_test_idx; - for (long cnt = 0; cnt < num_in_train; ++cnt) - { - samples_train.push_back(samples[next]); - next = (next + 1)%samples.size(); - } - - - const typename trainer_type::trained_function_type& df = trainer.train(samples_train); - - // check how good df is on the test data - for (unsigned long i = 0; i < samples_test.size(); ++i) - { - rel_scores.resize(samples_test[i].relevant.size()); - nonrel_scores.resize(samples_test[i].nonrelevant.size()); - - total_scores.clear(); - - for (unsigned long k = 0; k < rel_scores.size(); ++k) - { - rel_scores[k] = df(samples_test[i].relevant[k]); - total_scores.push_back(std::make_pair(rel_scores[k], true)); - } - - for (unsigned long k = 0; k < nonrel_scores.size(); ++k) - { - nonrel_scores[k] = df(samples_test[i].nonrelevant[k]); - total_scores.push_back(std::make_pair(nonrel_scores[k], false)); - } - - // Now compute the average precision for this sample. We need to sort the - // results and the back them into total_ranking. Note that we sort them so - // that, if you get a block of ranking values that are all equal, the elements - // marked as true will come last. This prevents a ranking from outputting a - // constant value for everything and still getting a good MAP score. - std::sort(total_scores.rbegin(), total_scores.rend(), impl::compare_first_reverse_second); - total_ranking.clear(); - for (unsigned long i = 0; i < total_scores.size(); ++i) - total_ranking.push_back(total_scores[i].second); - rs.add(average_precision(total_ranking)); - - - count_ranking_inversions(rel_scores, nonrel_scores, rel_counts, nonrel_counts); - - total_pairs += rel_scores.size()*nonrel_scores.size(); - - // Note that we don't need to look at nonrel_counts since it is redundant with - // the information in rel_counts in this case. - total_wrong += sum(mat(rel_counts)); - } - - } // for (long i = 0; i < folds; ++i) - - const double rank_swaps = static_cast<double>(total_pairs - total_wrong) / total_pairs; - const double mean_average_precision = rs.mean(); - matrix<double,1,2> res; - res = rank_swaps, mean_average_precision; - return res; - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_RANKING_ToOLS_Hh_ - diff --git a/ml/dlib/dlib/svm/ranking_tools_abstract.h b/ml/dlib/dlib/svm/ranking_tools_abstract.h deleted file mode 100644 index af6c7a2e3..000000000 --- a/ml/dlib/dlib/svm/ranking_tools_abstract.h +++ /dev/null @@ -1,247 +0,0 @@ -// Copyright (C) 2012 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_RANKING_ToOLS_ABSTRACT_Hh_ -#ifdef DLIB_RANKING_ToOLS_ABSTRACT_Hh_ - - -#include "../algs.h" -#include "../matrix.h" -#include <vector> - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - struct ranking_pair - { - /*! - WHAT THIS OBJECT REPRESENTS - This object is used to contain a ranking example. In particular, we say - that a good ranking of T objects is one in which all the elements in - this->relevant are ranked higher than the elements of this->nonrelevant. - Therefore, ranking_pair objects are used to represent training examples for - learning-to-rank tasks. - !*/ - - ranking_pair() {} - /*! - ensures - - #relevant.size() == 0 - - #nonrelevant.size() == 0 - !*/ - - ranking_pair( - const std::vector<T>& r, - const std::vector<T>& nr - ) : relevant(r), nonrelevant(nr) {} - /*! - ensures - - #relevant == r - - #nonrelevant == nr - !*/ - - std::vector<T> relevant; - std::vector<T> nonrelevant; - }; - - template < - typename T - > - void serialize ( - const ranking_pair<T>& item, - std::ostream& out - ); - /*! - provides serialization support - !*/ - - template < - typename T - > - void deserialize ( - ranking_pair<T>& item, - std::istream& in - ); - /*! - provides deserialization support - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - bool is_ranking_problem ( - const std::vector<ranking_pair<T> >& samples - ); - /*! - ensures - - returns true if the data in samples represents a valid learning-to-rank - learning problem. That is, this function returns true if all of the - following are true and false otherwise: - - samples.size() > 0 - - for all valid i: - - samples[i].relevant.size() > 0 - - samples[i].nonrelevant.size() > 0 - - if (is_matrix<T>::value == true) then - - All the elements of samples::nonrelevant and samples::relevant must - represent row or column vectors and they must be the same dimension. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - unsigned long max_index_plus_one ( - const ranking_pair<T>& item - ); - /*! - requires - - T must be a dlib::matrix capable of storing column vectors or T must be a - sparse vector type as defined in dlib/svm/sparse_vector_abstract.h. - ensures - - returns std::max(max_index_plus_one(item.relevant), max_index_plus_one(item.nonrelevant)). - Therefore, this function can be used to find the dimensionality of the - vectors stored in item. - !*/ - - template < - typename T - > - unsigned long max_index_plus_one ( - const std::vector<ranking_pair<T> >& samples - ); - /*! - requires - - T must be a dlib::matrix capable of storing column vectors or T must be a - sparse vector type as defined in dlib/svm/sparse_vector_abstract.h. - ensures - - returns the maximum of max_index_plus_one(samples[i]) over all valid values - of i. Therefore, this function can be used to find the dimensionality of the - vectors stored in samples - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - void count_ranking_inversions ( - const std::vector<T>& x, - const std::vector<T>& y, - std::vector<unsigned long>& x_count, - std::vector<unsigned long>& y_count - ); - /*! - requires - - T objects must be copyable - - T objects must be comparable via operator< - ensures - - This function counts how many times we see a y value greater than or equal to - an x value. This is done efficiently in O(n*log(n)) time via the use of - quick sort. - - #x_count.size() == x.size() - - #y_count.size() == y.size() - - for all valid i: - - #x_count[i] == how many times a value in y was >= x[i]. - - for all valid j: - - #y_count[j] == how many times a value in x was <= y[j]. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename ranking_function, - typename T - > - matrix<double,1,2> test_ranking_function ( - const ranking_function& funct, - const std::vector<ranking_pair<T> >& samples - ); - /*! - requires - - is_ranking_problem(samples) == true - - ranking_function == some kind of decision function object (e.g. decision_function) - ensures - - Tests the given ranking function on the supplied example ranking data and - returns the fraction of ranking pair orderings predicted correctly. This is - a number in the range [0,1] where 0 means everything was incorrectly - predicted while 1 means everything was correctly predicted. This function - also returns the mean average precision. - - In particular, this function returns a matrix M summarizing the results. - Specifically, it returns an M such that: - - M(0) == the fraction of times that the following is true: - - funct(samples[k].relevant[i]) > funct(samples[k].nonrelevant[j]) - (for all valid i,j,k) - - M(1) == the mean average precision of the rankings induced by funct. - (Mean average precision is a number in the range 0 to 1. Moreover, a - mean average precision of 1 means everything was correctly predicted - while smaller values indicate worse rankings. See the documentation - for average_precision() for details of its computation.) - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename ranking_function, - typename T - > - matrix<double,1,2> test_ranking_function ( - const ranking_function& funct, - const ranking_pair<T>& sample - ); - /*! - requires - - is_ranking_problem(std::vector<ranking_pair<T> >(1, sample)) == true - - ranking_function == some kind of decision function object (e.g. decision_function) - ensures - - This is just a convenience routine for calling the above - test_ranking_function() routine. That is, it just copies sample into a - std::vector object and invokes the above test_ranking_function() routine. - This means that calling this function is equivalent to invoking: - return test_ranking_function(funct, std::vector<ranking_pair<T> >(1, sample)); - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type, - typename T - > - matrix<double,1,2> cross_validate_ranking_trainer ( - const trainer_type& trainer, - const std::vector<ranking_pair<T> >& samples, - const long folds - ); - /*! - requires - - is_ranking_problem(samples) == true - - 1 < folds <= samples.size() - - trainer_type == some kind of ranking trainer object (e.g. svm_rank_trainer) - ensures - - Performs k-fold cross validation by using the given trainer to solve the - given ranking problem for the given number of folds. Each fold is tested - using the output of the trainer and the average ranking accuracy as well as - the mean average precision over the number of folds is returned. - - The accuracy is computed the same way test_ranking_function() computes its - accuracy. Therefore, it is a number in the range [0,1] that represents the - fraction of times a ranking pair's ordering was predicted correctly. Similarly, - the mean average precision is computed identically to test_ranking_function(). - In particular, this means that this function returns a matrix M such that: - - M(0) == the ranking accuracy - - M(1) == the mean average precision - - The number of folds used is given by the folds argument. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_RANKING_ToOLS_ABSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/svm/rbf_network.h b/ml/dlib/dlib/svm/rbf_network.h deleted file mode 100644 index 23a2c7424..000000000 --- a/ml/dlib/dlib/svm/rbf_network.h +++ /dev/null @@ -1,162 +0,0 @@ -// Copyright (C) 2008 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_RBf_NETWORK_ -#define DLIB_RBf_NETWORK_ - -#include "../matrix.h" -#include "rbf_network_abstract.h" -#include "kernel.h" -#include "linearly_independent_subset_finder.h" -#include "function.h" -#include "../algs.h" - -namespace dlib -{ - -// ------------------------------------------------------------------------------ - - template < - typename Kern - > - class rbf_network_trainer - { - /*! - This is an implementation of an RBF network trainer that follows - the directions right off Wikipedia basically. So nothing - particularly fancy. Although the way the centers are selected - is somewhat unique. - !*/ - - public: - typedef Kern kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef decision_function<kernel_type> trained_function_type; - - rbf_network_trainer ( - ) : - num_centers(10) - { - } - - void set_kernel ( - const kernel_type& k - ) - { - kernel = k; - } - - const kernel_type& get_kernel ( - ) const - { - return kernel; - } - - void set_num_centers ( - const unsigned long num - ) - { - num_centers = num; - } - - unsigned long get_num_centers ( - ) const - { - return num_centers; - } - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const - { - return do_train(mat(x), mat(y)); - } - - void swap ( - rbf_network_trainer& item - ) - { - exchange(kernel, item.kernel); - exchange(num_centers, item.num_centers); - } - - private: - - // ------------------------------------------------------------------------------------ - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> do_train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const - { - typedef typename decision_function<kernel_type>::scalar_vector_type scalar_vector_type; - - // make sure requires clause is not broken - DLIB_ASSERT(is_learning_problem(x,y), - "\tdecision_function rbf_network_trainer::train(x,y)" - << "\n\t invalid inputs were given to this function" - << "\n\t x.nr(): " << x.nr() - << "\n\t y.nr(): " << y.nr() - << "\n\t x.nc(): " << x.nc() - << "\n\t y.nc(): " << y.nc() - ); - - // use the linearly_independent_subset_finder object to select the centers. So here - // we show it all the data samples so it can find the best centers. - linearly_independent_subset_finder<kernel_type> lisf(kernel, num_centers); - fill_lisf(lisf, x); - - const long num_centers = lisf.size(); - - // fill the K matrix with the output of the kernel for all the center and sample point pairs - matrix<scalar_type,0,0,mem_manager_type> K(x.nr(), num_centers+1); - for (long r = 0; r < x.nr(); ++r) - { - for (long c = 0; c < num_centers; ++c) - { - K(r,c) = kernel(x(r), lisf[c]); - } - // This last column of the K matrix takes care of the bias term - K(r,num_centers) = 1; - } - - // compute the best weights by using the pseudo-inverse - scalar_vector_type weights(pinv(K)*y); - - // now put everything into a decision_function object and return it - return decision_function<kernel_type> (remove_row(weights,num_centers), - -weights(num_centers), - kernel, - lisf.get_dictionary()); - - } - - kernel_type kernel; - unsigned long num_centers; - - }; // end of class rbf_network_trainer - -// ---------------------------------------------------------------------------------------- - - template <typename sample_type> - void swap ( - rbf_network_trainer<sample_type>& a, - rbf_network_trainer<sample_type>& b - ) { a.swap(b); } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_RBf_NETWORK_ - diff --git a/ml/dlib/dlib/svm/rbf_network_abstract.h b/ml/dlib/dlib/svm/rbf_network_abstract.h deleted file mode 100644 index 782a4bdbd..000000000 --- a/ml/dlib/dlib/svm/rbf_network_abstract.h +++ /dev/null @@ -1,132 +0,0 @@ -// Copyright (C) 2008 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_RBf_NETWORK_ABSTRACT_ -#ifdef DLIB_RBf_NETWORK_ABSTRACT_ - -#include "../algs.h" -#include "function_abstract.h" -#include "kernel_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename K - > - class rbf_network_trainer - { - /*! - REQUIREMENTS ON K - is a kernel function object as defined in dlib/svm/kernel_abstract.h - (since this is supposed to be a RBF network it is probably reasonable - to use some sort of radial basis kernel) - - INITIAL VALUE - - get_num_centers() == 10 - - WHAT THIS OBJECT REPRESENTS - This object implements a trainer for a radial basis function network. - - The implementation of this algorithm follows the normal RBF training - process. For more details see the code or the Wikipedia article - about RBF networks. - !*/ - - public: - typedef K kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef decision_function<kernel_type> trained_function_type; - - rbf_network_trainer ( - ); - /*! - ensures - - this object is properly initialized - !*/ - - void set_kernel ( - const kernel_type& k - ); - /*! - ensures - - #get_kernel() == k - !*/ - - const kernel_type& get_kernel ( - ) const; - /*! - ensures - - returns a copy of the kernel function in use by this object - !*/ - - void set_num_centers ( - const unsigned long num_centers - ); - /*! - ensures - - #get_num_centers() == num_centers - !*/ - - const unsigned long get_num_centers ( - ) const; - /*! - ensures - - returns the maximum number of centers (a.k.a. basis_vectors in the - trained decision_function) you will get when you train this object on data. - !*/ - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const - /*! - requires - - x == a matrix or something convertible to a matrix via mat(). - Also, x should contain sample_type objects. - - y == a matrix or something convertible to a matrix via mat(). - Also, y should contain scalar_type objects. - - is_learning_problem(x,y) == true - ensures - - trains a RBF network given the training samples in x and - labels in y and returns the resulting decision_function - throws - - std::bad_alloc - !*/ - - void swap ( - rbf_network_trainer& item - ); - /*! - ensures - - swaps *this and item - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - template <typename K> - void swap ( - rbf_network_trainer<K>& a, - rbf_network_trainer<K>& b - ) { a.swap(b); } - /*! - provides a global swap - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_RBf_NETWORK_ABSTRACT_ - - - diff --git a/ml/dlib/dlib/svm/reduced.h b/ml/dlib/dlib/svm/reduced.h deleted file mode 100644 index b4c5b63ca..000000000 --- a/ml/dlib/dlib/svm/reduced.h +++ /dev/null @@ -1,613 +0,0 @@ -// Copyright (C) 2008 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_REDUCEd_TRAINERS_ -#define DLIB_REDUCEd_TRAINERS_ - -#include "reduced_abstract.h" -#include "../matrix.h" -#include "../algs.h" -#include "function.h" -#include "kernel.h" -#include "kcentroid.h" -#include "linearly_independent_subset_finder.h" -#include "../optimization.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type - > - class reduced_decision_function_trainer - { - public: - typedef typename trainer_type::kernel_type kernel_type; - typedef typename trainer_type::scalar_type scalar_type; - typedef typename trainer_type::sample_type sample_type; - typedef typename trainer_type::mem_manager_type mem_manager_type; - typedef typename trainer_type::trained_function_type trained_function_type; - - reduced_decision_function_trainer ( - ) :num_bv(0) {} - - reduced_decision_function_trainer ( - const trainer_type& trainer_, - const unsigned long num_sb_ - ) : - trainer(trainer_), - num_bv(num_sb_) - { - // make sure requires clause is not broken - DLIB_ASSERT(num_bv > 0, - "\t reduced_decision_function_trainer()" - << "\n\t you have given invalid arguments to this function" - << "\n\t num_bv: " << num_bv - ); - } - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(num_bv > 0, - "\t reduced_decision_function_trainer::train(x,y)" - << "\n\t You have tried to use an uninitialized version of this object" - << "\n\t num_bv: " << num_bv ); - return do_train(mat(x), mat(y)); - } - - private: - - // ------------------------------------------------------------------------------------ - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> do_train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const - { - // get the decision function object we are going to try and approximate - const decision_function<kernel_type>& dec_funct = trainer.train(x,y); - - // now find a linearly independent subset of the training points of num_bv points. - linearly_independent_subset_finder<kernel_type> lisf(dec_funct.kernel_function, num_bv); - fill_lisf(lisf, x); - - // The next few statements just find the best weights with which to approximate - // the dec_funct object with the smaller set of vectors in the lisf dictionary. This - // is really just a simple application of some linear algebra. For the details - // see page 554 of Learning with kernels by Scholkopf and Smola where they talk - // about "Optimal Expansion Coefficients." - - const kernel_type kern(dec_funct.kernel_function); - - matrix<scalar_type,0,1,mem_manager_type> alpha; - - alpha = lisf.get_inv_kernel_marix()*(kernel_matrix(kern,lisf,dec_funct.basis_vectors)*dec_funct.alpha); - - decision_function<kernel_type> new_df(alpha, - 0, - kern, - lisf.get_dictionary()); - - // now we have to figure out what the new bias should be. It might be a little - // different since we just messed with all the weights and vectors. - double bias = 0; - for (long i = 0; i < x.nr(); ++i) - { - bias += new_df(x(i)) - dec_funct(x(i)); - } - - new_df.b = bias/x.nr(); - - return new_df; - } - - // ------------------------------------------------------------------------------------ - - trainer_type trainer; - unsigned long num_bv; - - - }; // end of class reduced_decision_function_trainer - - template <typename trainer_type> - const reduced_decision_function_trainer<trainer_type> reduced ( - const trainer_type& trainer, - const unsigned long num_bv - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(num_bv > 0, - "\tconst reduced_decision_function_trainer reduced()" - << "\n\t you have given invalid arguments to this function" - << "\n\t num_bv: " << num_bv - ); - - return reduced_decision_function_trainer<trainer_type>(trainer, num_bv); - } - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - namespace red_impl - { - - // ------------------------------------------------------------------------------------ - - template <typename kernel_type> - class objective - { - /* - This object represents the objective function we will try to - minimize in approximate_distance_function(). - - The objective is the distance, in kernel induced feature space, between - the original distance function and the approximated version. - - */ - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - public: - objective( - const distance_function<kernel_type>& dist_funct_, - matrix<scalar_type,0,1,mem_manager_type>& b_, - matrix<sample_type,0,1,mem_manager_type>& out_vectors_ - ) : - dist_funct(dist_funct_), - b(b_), - out_vectors(out_vectors_) - { - } - - const matrix<scalar_type, 0, 1, mem_manager_type> state_to_vector ( - ) const - /*! - ensures - - returns a vector that contains all the information necessary to - reproduce the current state of the approximated distance function - !*/ - { - matrix<scalar_type, 0, 1, mem_manager_type> z(b.nr() + out_vectors.size()*out_vectors(0).nr()); - long i = 0; - for (long j = 0; j < b.nr(); ++j) - { - z(i) = b(j); - ++i; - } - - for (long j = 0; j < out_vectors.size(); ++j) - { - for (long k = 0; k < out_vectors(j).size(); ++k) - { - z(i) = out_vectors(j)(k); - ++i; - } - } - return z; - } - - - void vector_to_state ( - const matrix<scalar_type, 0, 1, mem_manager_type>& z - ) const - /*! - requires - - z came from the state_to_vector() function or has a compatible format - ensures - - loads the vector z into the state variables of the approximate - distance function (i.e. b and out_vectors) - !*/ - { - long i = 0; - for (long j = 0; j < b.nr(); ++j) - { - b(j) = z(i); - ++i; - } - - for (long j = 0; j < out_vectors.size(); ++j) - { - for (long k = 0; k < out_vectors(j).size(); ++k) - { - out_vectors(j)(k) = z(i); - ++i; - } - } - } - - double operator() ( - const matrix<scalar_type, 0, 1, mem_manager_type>& z - ) const - /*! - ensures - - loads the current approximate distance function with z - - returns the distance between the original distance function - and the approximate one. - !*/ - { - vector_to_state(z); - const kernel_type k(dist_funct.get_kernel()); - - double temp = 0; - for (long i = 0; i < out_vectors.size(); ++i) - { - for (long j = 0; j < dist_funct.get_basis_vectors().nr(); ++j) - { - temp -= b(i)*dist_funct.get_alpha()(j)*k(out_vectors(i), dist_funct.get_basis_vectors()(j)); - } - } - - temp *= 2; - - for (long i = 0; i < out_vectors.size(); ++i) - { - for (long j = 0; j < out_vectors.size(); ++j) - { - temp += b(i)*b(j)*k(out_vectors(i), out_vectors(j)); - } - } - - return temp + dist_funct.get_squared_norm(); - } - - private: - - const distance_function<kernel_type>& dist_funct; - matrix<scalar_type,0,1,mem_manager_type>& b; - matrix<sample_type,0,1,mem_manager_type>& out_vectors; - - }; - - // ------------------------------------------------------------------------------------ - - template <typename kernel_type> - class objective_derivative - { - /*! - This object represents the derivative of the objective object - !*/ - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - public: - - - objective_derivative( - const distance_function<kernel_type>& dist_funct_, - matrix<scalar_type,0,1,mem_manager_type>& b_, - matrix<sample_type,0,1,mem_manager_type>& out_vectors_ - ) : - dist_funct(dist_funct_), - b(b_), - out_vectors(out_vectors_) - { - } - - void vector_to_state ( - const matrix<scalar_type, 0, 1, mem_manager_type>& z - ) const - /*! - requires - - z came from the state_to_vector() function or has a compatible format - ensures - - loads the vector z into the state variables of the approximate - distance function (i.e. b and out_vectors) - !*/ - { - long i = 0; - for (long j = 0; j < b.nr(); ++j) - { - b(j) = z(i); - ++i; - } - - for (long j = 0; j < out_vectors.size(); ++j) - { - for (long k = 0; k < out_vectors(j).size(); ++k) - { - out_vectors(j)(k) = z(i); - ++i; - } - } - } - - const matrix<scalar_type,0,1,mem_manager_type>& operator() ( - const matrix<scalar_type, 0, 1, mem_manager_type>& z - ) const - /*! - ensures - - loads the current approximate distance function with z - - returns the derivative of the distance between the original - distance function and the approximate one. - !*/ - { - vector_to_state(z); - res.set_size(z.nr()); - set_all_elements(res,0); - const kernel_type k(dist_funct.get_kernel()); - const kernel_derivative<kernel_type> K_der(k); - - // first compute the gradient for the beta weights - for (long i = 0; i < out_vectors.size(); ++i) - { - for (long j = 0; j < out_vectors.size(); ++j) - { - res(i) += b(j)*k(out_vectors(i), out_vectors(j)); - } - } - for (long i = 0; i < out_vectors.size(); ++i) - { - for (long j = 0; j < dist_funct.get_basis_vectors().size(); ++j) - { - res(i) -= dist_funct.get_alpha()(j)*k(out_vectors(i), dist_funct.get_basis_vectors()(j)); - } - } - - - // now compute the gradient of the actual vectors that go into the kernel functions - long pos = out_vectors.size(); - const long num = out_vectors(0).nr(); - temp.set_size(num,1); - for (long i = 0; i < out_vectors.size(); ++i) - { - set_all_elements(temp,0); - for (long j = 0; j < out_vectors.size(); ++j) - { - temp += b(j)*K_der(out_vectors(j), out_vectors(i)); - } - for (long j = 0; j < dist_funct.get_basis_vectors().nr(); ++j) - { - temp -= dist_funct.get_alpha()(j)*K_der(dist_funct.get_basis_vectors()(j), out_vectors(i) ); - } - - // store the gradient for out_vectors(i) into result in the proper spot - set_subm(res,pos,0,num,1) = b(i)*temp; - pos += num; - } - - - res *= 2; - return res; - } - - private: - - mutable matrix<scalar_type, 0, 1, mem_manager_type> res; - mutable sample_type temp; - - const distance_function<kernel_type>& dist_funct; - matrix<scalar_type,0,1,mem_manager_type>& b; - matrix<sample_type,0,1,mem_manager_type>& out_vectors; - - }; - - // ------------------------------------------------------------------------------------ - - } - - template < - typename K, - typename stop_strategy_type, - typename T - > - distance_function<K> approximate_distance_function ( - stop_strategy_type stop_strategy, - const distance_function<K>& target, - const T& starting_basis - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(target.get_basis_vectors().size() > 0 && - starting_basis.size() > 0, - "\t distance_function approximate_distance_function()" - << "\n\t Invalid inputs were given to this function." - << "\n\t target.get_basis_vectors().size(): " << target.get_basis_vectors().size() - << "\n\t starting_basis.size(): " << starting_basis.size() - ); - - using namespace red_impl; - // The next few statements just find the best weights with which to approximate - // the target object with the set of basis vectors in starting_basis. This - // is really just a simple application of some linear algebra. For the details - // see page 554 of Learning with kernels by Scholkopf and Smola where they talk - // about "Optimal Expansion Coefficients." - - const K kern(target.get_kernel()); - typedef typename K::scalar_type scalar_type; - typedef typename K::sample_type sample_type; - typedef typename K::mem_manager_type mem_manager_type; - - matrix<scalar_type,0,1,mem_manager_type> beta; - - // Now we compute the fist approximate distance function. - beta = pinv(kernel_matrix(kern,starting_basis)) * - (kernel_matrix(kern,starting_basis,target.get_basis_vectors())*target.get_alpha()); - matrix<sample_type,0,1,mem_manager_type> out_vectors(mat(starting_basis)); - - - // Now setup to do a global optimization of all the parameters in the approximate - // distance function. - const objective<K> obj(target, beta, out_vectors); - const objective_derivative<K> obj_der(target, beta, out_vectors); - matrix<scalar_type,0,1,mem_manager_type> opt_starting_point(obj.state_to_vector()); - - - // perform a full optimization of all the parameters (i.e. both beta and the basis vectors together) - find_min(lbfgs_search_strategy(20), - stop_strategy, - obj, obj_der, opt_starting_point, 0); - - // now make sure that the final optimized value is loaded into the beta and - // out_vectors matrices - obj.vector_to_state(opt_starting_point); - - // Do a final reoptimization of beta just to make sure it is optimal given the new - // set of basis vectors. - beta = pinv(kernel_matrix(kern,out_vectors))*(kernel_matrix(kern,out_vectors,target.get_basis_vectors())*target.get_alpha()); - - // It is possible that some of the beta weights will be very close to zero. Lets remove - // the basis vectors with these essentially zero weights. - const scalar_type eps = max(abs(beta))*std::numeric_limits<scalar_type>::epsilon(); - for (long i = 0; i < beta.size(); ++i) - { - // if beta(i) is zero (but leave at least one beta no matter what) - if (std::abs(beta(i)) < eps && beta.size() > 1) - { - beta = remove_row(beta, i); - out_vectors = remove_row(out_vectors, i); - --i; - } - } - - return distance_function<K>(beta, kern, out_vectors); - } - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type - > - class reduced_decision_function_trainer2 - { - public: - typedef typename trainer_type::kernel_type kernel_type; - typedef typename trainer_type::scalar_type scalar_type; - typedef typename trainer_type::sample_type sample_type; - typedef typename trainer_type::mem_manager_type mem_manager_type; - typedef typename trainer_type::trained_function_type trained_function_type; - - reduced_decision_function_trainer2 () : num_bv(0) {} - reduced_decision_function_trainer2 ( - const trainer_type& trainer_, - const long num_sb_, - const double eps_ = 1e-3 - ) : - trainer(trainer_), - num_bv(num_sb_), - eps(eps_) - { - COMPILE_TIME_ASSERT(is_matrix<sample_type>::value); - - // make sure requires clause is not broken - DLIB_ASSERT(num_bv > 0 && eps > 0, - "\t reduced_decision_function_trainer2()" - << "\n\t you have given invalid arguments to this function" - << "\n\t num_bv: " << num_bv - << "\n\t eps: " << eps - ); - } - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(num_bv > 0, - "\t reduced_decision_function_trainer2::train(x,y)" - << "\n\t You have tried to use an uninitialized version of this object" - << "\n\t num_bv: " << num_bv ); - return do_train(mat(x), mat(y)); - } - - private: - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> do_train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const - { - // get the decision function object we are going to try and approximate - const decision_function<kernel_type>& dec_funct = trainer.train(x,y); - const kernel_type kern(dec_funct.kernel_function); - - // now find a linearly independent subset of the training points of num_bv points. - linearly_independent_subset_finder<kernel_type> lisf(kern, num_bv); - fill_lisf(lisf,x); - - distance_function<kernel_type> approx, target; - target = dec_funct; - approx = approximate_distance_function(objective_delta_stop_strategy(eps), target, lisf); - - decision_function<kernel_type> new_df(approx.get_alpha(), - 0, - kern, - approx.get_basis_vectors()); - - // now we have to figure out what the new bias should be. It might be a little - // different since we just messed with all the weights and vectors. - double bias = 0; - for (long i = 0; i < x.nr(); ++i) - { - bias += new_df(x(i)) - dec_funct(x(i)); - } - - new_df.b = bias/x.nr(); - - return new_df; - - } - - // ------------------------------------------------------------------------------------ - - trainer_type trainer; - long num_bv; - double eps; - - - }; // end of class reduced_decision_function_trainer2 - - template <typename trainer_type> - const reduced_decision_function_trainer2<trainer_type> reduced2 ( - const trainer_type& trainer, - const long num_bv, - double eps = 1e-3 - ) - { - COMPILE_TIME_ASSERT(is_matrix<typename trainer_type::sample_type>::value); - - // make sure requires clause is not broken - DLIB_ASSERT(num_bv > 0 && eps > 0, - "\tconst reduced_decision_function_trainer2 reduced2()" - << "\n\t you have given invalid arguments to this function" - << "\n\t num_bv: " << num_bv - << "\n\t eps: " << eps - ); - - return reduced_decision_function_trainer2<trainer_type>(trainer, num_bv, eps); - } - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_REDUCEd_TRAINERS_ - diff --git a/ml/dlib/dlib/svm/reduced_abstract.h b/ml/dlib/dlib/svm/reduced_abstract.h deleted file mode 100644 index 8b186c033..000000000 --- a/ml/dlib/dlib/svm/reduced_abstract.h +++ /dev/null @@ -1,267 +0,0 @@ -// Copyright (C) 2008 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_REDUCED_TRAINERs_ABSTRACT_ -#ifdef DLIB_REDUCED_TRAINERs_ABSTRACT_ - -#include "../matrix.h" -#include "../algs.h" -#include "function_abstract.h" -#include "kernel_abstract.h" -#include "../optimization.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type - > - class reduced_decision_function_trainer - { - /*! - REQUIREMENTS ON trainer_type - - trainer_type == some kind of batch trainer object (e.g. svm_nu_trainer) - - WHAT THIS OBJECT REPRESENTS - This object represents an implementation of a reduced set algorithm. - This object acts as a post processor for anything that creates - decision_function objects. It wraps another trainer object and - performs this reduced set post processing with the goal of - representing the original decision function in a form that - involves fewer basis vectors. - !*/ - - public: - typedef typename trainer_type::kernel_type kernel_type; - typedef typename trainer_type::scalar_type scalar_type; - typedef typename trainer_type::sample_type sample_type; - typedef typename trainer_type::mem_manager_type mem_manager_type; - typedef typename trainer_type::trained_function_type trained_function_type; - - reduced_decision_function_trainer ( - ); - /*! - ensures - - This object is in an uninitialized state. You must - construct a real one with the other constructor and assign it - to this instance before you use this object. - !*/ - - reduced_decision_function_trainer ( - const trainer_type& trainer, - const unsigned long num_bv - ); - /*! - requires - - num_bv > 0 - ensures - - returns a trainer object that applies post processing to the decision_function - objects created by the given trainer object with the goal of creating - decision_function objects with fewer basis vectors. - - The reduced decision functions that are output will have at most - num_bv basis vectors. - !*/ - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const; - /*! - ensures - - trains a decision_function using the trainer that was supplied to - this object's constructor and then finds a reduced representation - for it and returns the reduced version. - throws - - std::bad_alloc - - any exceptions thrown by the trainer_type object - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type - > - const reduced_decision_function_trainer<trainer_type> reduced ( - const trainer_type& trainer, - const unsigned long num_bv - ) { return reduced_decision_function_trainer<trainer_type>(trainer, num_bv); } - /*! - requires - - num_bv > 0 - - trainer_type == some kind of batch trainer object that creates decision_function - objects (e.g. svm_nu_trainer) - ensures - - returns a reduced_decision_function_trainer object that has been - instantiated with the given arguments. - !*/ - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename K, - typename stop_strategy_type, - typename T - > - distance_function<K> approximate_distance_function ( - stop_strategy_type stop_strategy, - const distance_function<K>& target, - const T& starting_basis - ); - /*! - requires - - stop_strategy == an object that defines a stop strategy such as one of - the objects from dlib/optimization/optimization_stop_strategies_abstract.h - - requirements on starting_basis - - T must be a dlib::matrix type or something convertible to a matrix via mat() - (e.g. a std::vector). Additionally, starting_basis must contain K::sample_type - objects which can be supplied to the kernel function used by target. - - is_vector(starting_basis) == true - - starting_basis.size() > 0 - - target.get_basis_vectors().size() > 0 - - kernel_derivative<K> is defined - (i.e. The analytic derivative for the given kernel must be defined) - - K::sample_type must be a dlib::matrix object and the basis_vectors inside target - and starting_basis must be column vectors. - ensures - - This routine attempts to find a distance_function object which is close - to the given target. That is, it searches for an X such that target(X) is - minimized. The optimization begins with an X in the span of the elements - of starting_basis and searches for an X which locally minimizes target(X). - Since this problem can have many local minima, the quality of the starting - basis can significantly influence the results. - - The optimization is over all variables in a distance_function, however, - the size of the basis set is constrained to no more than starting_basis.size(). - That is, in the returned distance_function DF, we will have: - - DF.get_basis_vectors().size() <= starting_basis.size() - - The optimization is carried out until the stop_strategy indicates it - should stop. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type - > - class reduced_decision_function_trainer2 - { - /*! - REQUIREMENTS ON trainer_type - - trainer_type == some kind of batch trainer object (e.g. svm_nu_trainer) - - trainer_type::sample_type must be a dlib::matrix object - - kernel_derivative<trainer_type::kernel_type> must be defined - - WHAT THIS OBJECT REPRESENTS - This object represents an implementation of a reduced set algorithm. - This object acts as a post processor for anything that creates - decision_function objects. It wraps another trainer object and - performs this reduced set post processing with the goal of - representing the original decision function in a form that - involves fewer basis vectors. - - This object's implementation is the same as that in the above - reduced_decision_function_trainer object except it also performs - a global gradient based optimization at the end to further - improve the approximation to the original decision function - object. - !*/ - - public: - typedef typename trainer_type::kernel_type kernel_type; - typedef typename trainer_type::scalar_type scalar_type; - typedef typename trainer_type::sample_type sample_type; - typedef typename trainer_type::mem_manager_type mem_manager_type; - typedef typename trainer_type::trained_function_type trained_function_type; - - reduced_decision_function_trainer2 ( - ); - /*! - ensures - - This object is in an uninitialized state. You must - construct a real one with the other constructor and assign it - to this instance before you use this object. - !*/ - - reduced_decision_function_trainer2 ( - const trainer_type& trainer, - const unsigned long num_bv, - double eps = 1e-3 - ); - /*! - requires - - num_bv > 0 - - eps > 0 - ensures - - returns a trainer object that applies post processing to the decision_function - objects created by the given trainer object with the goal of creating - decision_function objects with fewer basis vectors. - - The reduced decision functions that are output will have at most - num_bv basis vectors. - - the gradient based optimization will continue until the change in the - objective function is less than eps. So smaller values of eps will - give better results but take longer to compute. - !*/ - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const; - /*! - requires - - x must be a list of objects which are each some kind of dlib::matrix - which represents column or row vectors. - ensures - - trains a decision_function using the trainer that was supplied to - this object's constructor and then finds a reduced representation - for it and returns the reduced version. - throws - - std::bad_alloc - - any exceptions thrown by the trainer_type object - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type - > - const reduced_decision_function_trainer2<trainer_type> reduced2 ( - const trainer_type& trainer, - const unsigned long num_bv, - double eps = 1e-3 - ) { return reduced_decision_function_trainer2<trainer_type>(trainer, num_bv, eps); } - /*! - requires - - num_bv > 0 - - trainer_type == some kind of batch trainer object that creates decision_function - objects (e.g. svm_nu_trainer) - - kernel_derivative<trainer_type::kernel_type> is defined - - eps > 0 - ensures - - returns a reduced_decision_function_trainer2 object that has been - instantiated with the given arguments. - !*/ - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_REDUCED_TRAINERs_ABSTRACT_ - diff --git a/ml/dlib/dlib/svm/rls.h b/ml/dlib/dlib/svm/rls.h deleted file mode 100644 index edee6b062..000000000 --- a/ml/dlib/dlib/svm/rls.h +++ /dev/null @@ -1,232 +0,0 @@ -// Copyright (C) 2012 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_RLs_Hh_ -#define DLIB_RLs_Hh_ - -#include "rls_abstract.h" -#include "../matrix.h" -#include "function.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - class rls - { - - public: - - - explicit rls( - double forget_factor_, - double C_ = 1000, - bool apply_forget_factor_to_C_ = false - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(0 < forget_factor_ && forget_factor_ <= 1 && - 0 < C_, - "\t rls::rls()" - << "\n\t invalid arguments were given to this function" - << "\n\t forget_factor_: " << forget_factor_ - << "\n\t C_: " << C_ - << "\n\t this: " << this - ); - - - C = C_; - forget_factor = forget_factor_; - apply_forget_factor_to_C = apply_forget_factor_to_C_; - } - - rls( - ) - { - C = 1000; - forget_factor = 1; - apply_forget_factor_to_C = false; - } - - double get_c( - ) const - { - return C; - } - - double get_forget_factor( - ) const - { - return forget_factor; - } - - bool should_apply_forget_factor_to_C ( - ) const - { - return apply_forget_factor_to_C; - } - - template <typename EXP> - void train ( - const matrix_exp<EXP>& x, - double y - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_col_vector(x) && - (get_w().size() == 0 || get_w().size() == x.size()), - "\t void rls::train()" - << "\n\t invalid arguments were given to this function" - << "\n\t is_col_vector(x): " << is_col_vector(x) - << "\n\t x.size(): " << x.size() - << "\n\t get_w().size(): " << get_w().size() - << "\n\t this: " << this - ); - - if (R.size() == 0) - { - R = identity_matrix<double>(x.size())*C; - w.set_size(x.size()); - w = 0; - } - - // multiply by forget factor and incorporate x*trans(x) into R. - const double l = 1.0/forget_factor; - const double temp = 1 + l*trans(x)*R*x; - tmp = R*x; - R = l*R - l*l*(tmp*trans(tmp))/temp; - - // Since we multiplied by the forget factor, we need to add (1-forget_factor) of the - // identity matrix back in to keep the regularization alive. - if (forget_factor != 1 && !apply_forget_factor_to_C) - add_eye_to_inv(R, (1-forget_factor)/C); - - // R should always be symmetric. This line improves numeric stability of this algorithm. - if (cnt%10 == 0) - R = 0.5*(R + trans(R)); - ++cnt; - - w = w + R*x*(y - trans(x)*w); - - } - - - - const matrix<double,0,1>& get_w( - ) const - { - return w; - } - - template <typename EXP> - double operator() ( - const matrix_exp<EXP>& x - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(is_col_vector(x) && get_w().size() == x.size(), - "\t double rls::operator()()" - << "\n\t invalid arguments were given to this function" - << "\n\t is_col_vector(x): " << is_col_vector(x) - << "\n\t x.size(): " << x.size() - << "\n\t get_w().size(): " << get_w().size() - << "\n\t this: " << this - ); - - return dot(x,w); - } - - decision_function<linear_kernel<matrix<double,0,1> > > get_decision_function ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(get_w().size() != 0, - "\t decision_function rls::get_decision_function()" - << "\n\t invalid arguments were given to this function" - << "\n\t get_w().size(): " << get_w().size() - << "\n\t this: " << this - ); - - decision_function<linear_kernel<matrix<double,0,1> > > df; - df.alpha.set_size(1); - df.basis_vectors.set_size(1); - df.b = 0; - df.alpha = 1; - df.basis_vectors(0) = w; - - return df; - } - - friend inline void serialize(const rls& item, std::ostream& out) - { - int version = 2; - serialize(version, out); - serialize(item.w, out); - serialize(item.R, out); - serialize(item.C, out); - serialize(item.forget_factor, out); - serialize(item.cnt, out); - serialize(item.apply_forget_factor_to_C, out); - } - - friend inline void deserialize(rls& item, std::istream& in) - { - int version = 0; - deserialize(version, in); - if (!(1 <= version && version <= 2)) - throw dlib::serialization_error("Unknown version number found while deserializing rls object."); - - if (version >= 1) - { - deserialize(item.w, in); - deserialize(item.R, in); - deserialize(item.C, in); - deserialize(item.forget_factor, in); - } - item.cnt = 0; - item.apply_forget_factor_to_C = false; - if (version >= 2) - { - deserialize(item.cnt, in); - deserialize(item.apply_forget_factor_to_C, in); - } - } - - private: - - void add_eye_to_inv( - matrix<double>& m, - double C - ) - /*! - ensures - - Let m == inv(M) - - this function returns inv(M + C*identity_matrix<double>(m.nr())) - !*/ - { - for (long r = 0; r < m.nr(); ++r) - { - m = m - colm(m,r)*trans(colm(m,r))/(1/C + m(r,r)); - } - } - - - matrix<double,0,1> w; - matrix<double> R; - double C; - double forget_factor; - int cnt = 0; - bool apply_forget_factor_to_C; - - - // This object is here only to avoid reallocation during training. It don't - // logically contribute to the state of this object. - matrix<double,0,1> tmp; - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_RLs_Hh_ - diff --git a/ml/dlib/dlib/svm/rls_abstract.h b/ml/dlib/dlib/svm/rls_abstract.h deleted file mode 100644 index c593e4330..000000000 --- a/ml/dlib/dlib/svm/rls_abstract.h +++ /dev/null @@ -1,175 +0,0 @@ -// Copyright (C) 2012 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_RLs_ABSTRACT_Hh_ -#ifdef DLIB_RLs_ABSTRACT_Hh_ - -#include "../matrix/matrix_abstract.h" -#include "function_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - class rls - { - /*! - WHAT THIS OBJECT REPRESENTS - This is an implementation of the linear version of the recursive least - squares algorithm. It accepts training points incrementally and, at - each step, maintains the solution to the following optimization problem: - find w minimizing: 0.5*dot(w,w) + C*sum_i(y_i - trans(x_i)*w)^2 - Where (x_i,y_i) are training pairs. x_i is some vector and y_i is a target - scalar value. - - This object can also be configured to use exponential forgetting. This is - where each training example is weighted by pow(forget_factor, i), where i - indicates the sample's age. So older samples are weighted less in the - least squares solution and therefore become forgotten after some time. - Therefore, with forgetting, this object solves the following optimization - problem at each step: - find w minimizing: 0.5*dot(w,w) + C*sum_i pow(forget_factor, i)*(y_i - trans(x_i)*w)^2 - Where i starts at 0 and i==0 corresponds to the most recent training point. - !*/ - - public: - - - explicit rls( - double forget_factor, - double C = 1000, - bool apply_forget_factor_to_C = false - ); - /*! - requires - - 0 < forget_factor <= 1 - - 0 < C - ensures - - #get_w().size() == 0 - - #get_c() == C - - #get_forget_factor() == forget_factor - - #should_apply_forget_factor_to_C() == apply_forget_factor_to_C - !*/ - - rls( - ); - /*! - ensures - - #get_w().size() == 0 - - #get_c() == 1000 - - #get_forget_factor() == 1 - - #should_apply_forget_factor_to_C() == false - !*/ - - double get_c( - ) const; - /*! - ensures - - returns the regularization parameter. It is the parameter - that determines the trade-off between trying to fit the training - data or allowing more errors but hopefully improving the generalization - of the resulting regression. Larger values encourage exact fitting while - smaller values of C may encourage better generalization. - !*/ - - double get_forget_factor( - ) const; - /*! - ensures - - returns the exponential forgetting factor. A value of 1 disables forgetting - and results in normal least squares regression. On the other hand, a smaller - value causes the regression to forget about old training examples and prefer - instead to fit more recent examples. The closer the forget factor is to - zero the faster old examples are forgotten. - !*/ - - bool should_apply_forget_factor_to_C ( - ) const; - /*! - ensures - - If this function returns false then it means we are optimizing the - objective function discussed in the WHAT THIS OBJECT REPRESENTS section - above. However, if it returns true then we will allow the forget factor - (get_forget_factor()) to be applied to the C value which causes the - algorithm to slowly increase C and convert into a textbook version of RLS - without regularization. The main reason you might want to do this is - because it can make the algorithm run significantly faster. - !*/ - - template <typename EXP> - void train ( - const matrix_exp<EXP>& x, - double y - ) - /*! - requires - - is_col_vector(x) == true - - if (get_w().size() != 0) then - - x.size() == get_w().size() - (i.e. all training examples must have the same - dimensionality) - ensures - - #get_w().size() == x.size() - - updates #get_w() such that it contains the solution to the least - squares problem of regressing the given x onto the given y as well - as all the previous training examples supplied to train(). - !*/ - - const matrix<double,0,1>& get_w( - ) const; - /*! - ensures - - returns the regression weights. These are the values learned by the - least squares procedure. If train() has not been called then this - function returns an empty vector. - !*/ - - template <typename EXP> - double operator() ( - const matrix_exp<EXP>& x - ) const; - /*! - requires - - is_col_vector(x) == true - - get_w().size() == x.size() - ensures - - returns dot(x, get_w()) - !*/ - - decision_function<linear_kernel<matrix<double,0,1> > > get_decision_function ( - ) const; - /*! - requires - - get_w().size() != 0 - ensures - - returns a decision function DF such that: - - DF(x) == dot(x, get_w()) - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - void serialize ( - const rls& item, - std::ostream& out - ); - /*! - provides serialization support - !*/ - - void deserialize ( - rls& item, - std::istream& in - ); - /*! - provides deserialization support - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_RLs_ABSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/svm/roc_trainer.h b/ml/dlib/dlib/svm/roc_trainer.h deleted file mode 100644 index fa2c0ef9b..000000000 --- a/ml/dlib/dlib/svm/roc_trainer.h +++ /dev/null @@ -1,149 +0,0 @@ -// Copyright (C) 2009 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_ROC_TRAINEr_H_ -#define DLIB_ROC_TRAINEr_H_ - -#include "roc_trainer_abstract.h" -#include "../algs.h" -#include <limits> - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type - > - class roc_trainer_type - { - public: - typedef typename trainer_type::kernel_type kernel_type; - typedef typename trainer_type::scalar_type scalar_type; - typedef typename trainer_type::sample_type sample_type; - typedef typename trainer_type::mem_manager_type mem_manager_type; - typedef typename trainer_type::trained_function_type trained_function_type; - - roc_trainer_type ( - ) : desired_accuracy(0), class_selection(0){} - - roc_trainer_type ( - const trainer_type& trainer_, - const scalar_type& desired_accuracy_, - const scalar_type& class_selection_ - ) : trainer(trainer_), desired_accuracy(desired_accuracy_), class_selection(class_selection_) - { - // make sure requires clause is not broken - DLIB_ASSERT(0 <= desired_accuracy && desired_accuracy <= 1 && - (class_selection == -1 || class_selection == +1), - "\t roc_trainer_type::roc_trainer_type()" - << "\n\t invalid inputs were given to this function" - << "\n\t desired_accuracy: " << desired_accuracy - << "\n\t class_selection: " << class_selection - ); - } - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const trained_function_type train ( - const in_sample_vector_type& samples, - const in_scalar_vector_type& labels - ) const - /*! - requires - - is_binary_classification_problem(samples, labels) == true - !*/ - { - // make sure requires clause is not broken - DLIB_ASSERT(is_binary_classification_problem(samples, labels), - "\t roc_trainer_type::train()" - << "\n\t invalid inputs were given to this function" - ); - - - return do_train(mat(samples), mat(labels)); - } - - private: - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const trained_function_type do_train ( - const in_sample_vector_type& samples, - const in_scalar_vector_type& labels - ) const - { - trained_function_type df = trainer.train(samples, labels); - - // clear out the old bias - df.b = 0; - - // obtain all the scores from the df using all the class_selection labeled samples - std::vector<double> scores; - for (long i = 0; i < samples.size(); ++i) - { - if (labels(i) == class_selection) - scores.push_back(df(samples(i))); - } - - if (class_selection == +1) - std::sort(scores.rbegin(), scores.rend()); - else - std::sort(scores.begin(), scores.end()); - - // now pick out the index that gives us the desired accuracy with regards to selected class - unsigned long idx = static_cast<unsigned long>(desired_accuracy*scores.size() + 0.5); - if (idx >= scores.size()) - idx = scores.size()-1; - - df.b = scores[idx]; - - // In this case add a very small extra amount to the bias so that all the samples - // with the class_selection label are classified correctly. - if (desired_accuracy == 1) - { - if (class_selection == +1) - df.b -= std::numeric_limits<scalar_type>::epsilon()*df.b; - else - df.b += std::numeric_limits<scalar_type>::epsilon()*df.b; - } - - return df; - } - - trainer_type trainer; - scalar_type desired_accuracy; - scalar_type class_selection; - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type - > - const roc_trainer_type<trainer_type> roc_c1_trainer ( - const trainer_type& trainer, - const typename trainer_type::scalar_type& desired_accuracy - ) { return roc_trainer_type<trainer_type>(trainer, desired_accuracy, +1); } - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type - > - const roc_trainer_type<trainer_type> roc_c2_trainer ( - const trainer_type& trainer, - const typename trainer_type::scalar_type& desired_accuracy - ) { return roc_trainer_type<trainer_type>(trainer, desired_accuracy, -1); } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_ROC_TRAINEr_H_ - - diff --git a/ml/dlib/dlib/svm/roc_trainer_abstract.h b/ml/dlib/dlib/svm/roc_trainer_abstract.h deleted file mode 100644 index 74e6f9b65..000000000 --- a/ml/dlib/dlib/svm/roc_trainer_abstract.h +++ /dev/null @@ -1,135 +0,0 @@ -// Copyright (C) 2009 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_ROC_TRAINEr_ABSTRACT_ -#ifdef DLIB_ROC_TRAINEr_ABSTRACT_ - -#include "../algs.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type - > - class roc_trainer_type - { - /*! - REQUIREMENTS ON trainer_type - - trainer_type == some kind of batch trainer object (e.g. svm_nu_trainer) - - WHAT THIS OBJECT REPRESENTS - This object is a simple trainer post processor that allows you to - easily adjust the bias term in a trained decision_function object. - That is, this object lets you pick a point on the ROC curve and - it will adjust the bias term appropriately. - - So for example, suppose you wanted to set the bias term so that - the accuracy of your decision function on +1 labeled samples was 99%. - To do this you would use an instance of this object declared as follows: - roc_trainer_type<trainer_type>(your_trainer, 0.99, +1); - !*/ - - public: - typedef typename trainer_type::kernel_type kernel_type; - typedef typename trainer_type::scalar_type scalar_type; - typedef typename trainer_type::sample_type sample_type; - typedef typename trainer_type::mem_manager_type mem_manager_type; - typedef typename trainer_type::trained_function_type trained_function_type; - - roc_trainer_type ( - ); - /*! - ensures - - This object is in an uninitialized state. You must - construct a real one with the other constructor and assign it - to this instance before you use this object. - !*/ - - roc_trainer_type ( - const trainer_type& trainer_, - const scalar_type& desired_accuracy_, - const scalar_type& class_selection_ - ); - /*! - requires - - 0 <= desired_accuracy_ <= 1 - - class_selection_ == +1 or -1 - ensures - - when training is performed using this object it will automatically - adjust the bias term in the returned decision function so that it - achieves the desired accuracy on the selected class type. - !*/ - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const trained_function_type train ( - const in_sample_vector_type& samples, - const in_scalar_vector_type& labels - ) const - /*! - requires - - is_binary_classification_problem(samples, labels) == true - - x == a matrix or something convertible to a matrix via mat(). - Also, x should contain sample_type objects. - - y == a matrix or something convertible to a matrix via mat(). - Also, y should contain scalar_type objects. - ensures - - performs training using the trainer object given to this object's - constructor, then modifies the bias term in the returned decision function - as discussed above, and finally returns the decision function. - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type - > - const roc_trainer_type<trainer_type> roc_c1_trainer ( - const trainer_type& trainer, - const typename trainer_type::scalar_type& desired_accuracy - ) { return roc_trainer_type<trainer_type>(trainer, desired_accuracy, +1); } - /*! - requires - - 0 <= desired_accuracy <= 1 - - trainer_type == some kind of batch trainer object that creates decision_function - objects (e.g. svm_nu_trainer) - ensures - - returns a roc_trainer_type object that has been instantiated with the given - arguments. The returned roc trainer will select the decision function - bias that gives the desired accuracy with respect to the +1 class. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type - > - const roc_trainer_type<trainer_type> roc_c2_trainer ( - const trainer_type& trainer, - const typename trainer_type::scalar_type& desired_accuracy - ) { return roc_trainer_type<trainer_type>(trainer, desired_accuracy, -1); } - /*! - requires - - 0 <= desired_accuracy <= 1 - - trainer_type == some kind of batch trainer object that creates decision_function - objects (e.g. svm_nu_trainer) - ensures - - returns a roc_trainer_type object that has been instantiated with the given - arguments. The returned roc trainer will select the decision function - bias that gives the desired accuracy with respect to the -1 class. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_ROC_TRAINEr_ABSTRACT_ - - - diff --git a/ml/dlib/dlib/svm/rr_trainer.h b/ml/dlib/dlib/svm/rr_trainer.h deleted file mode 100644 index 09177217e..000000000 --- a/ml/dlib/dlib/svm/rr_trainer.h +++ /dev/null @@ -1,456 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_RR_TRAInER_Hh_ -#define DLIB_RR_TRAInER_Hh_ - -#include "../algs.h" -#include "function.h" -#include "kernel.h" -#include "empirical_kernel_map.h" -#include "linearly_independent_subset_finder.h" -#include "../statistics.h" -#include "rr_trainer_abstract.h" -#include <vector> -#include <iostream> - -namespace dlib -{ - template < - typename K - > - class rr_trainer - { - - public: - typedef K kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef decision_function<kernel_type> trained_function_type; - - // You are getting a compiler error on this line because you supplied a non-linear or - // sparse kernel to the rr_trainer object. You have to use dlib::linear_kernel with this trainer. - COMPILE_TIME_ASSERT((is_same_type<K, linear_kernel<sample_type> >::value)); - - rr_trainer ( - ) : - verbose(false), - use_regression_loss(true), - lambda(0) - { - // default lambda search list - lams = matrix_cast<scalar_type>(logspace(-9, 2, 50)); - } - - void be_verbose ( - ) - { - verbose = true; - } - - void be_quiet ( - ) - { - verbose = false; - } - - void use_regression_loss_for_loo_cv ( - ) - { - use_regression_loss = true; - } - - void use_classification_loss_for_loo_cv ( - ) - { - use_regression_loss = false; - } - - bool will_use_regression_loss_for_loo_cv ( - ) const - { - return use_regression_loss; - } - - const kernel_type get_kernel ( - ) const - { - return kernel_type(); - } - - void set_lambda ( - scalar_type lambda_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(lambda_ >= 0, - "\t void rr_trainer::set_lambda()" - << "\n\t lambda must be greater than or equal to 0" - << "\n\t lambda: " << lambda - << "\n\t this: " << this - ); - - lambda = lambda_; - } - - const scalar_type get_lambda ( - ) const - { - return lambda; - } - - template <typename EXP> - void set_search_lambdas ( - const matrix_exp<EXP>& lambdas - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_vector(lambdas) && lambdas.size() > 0 && min(lambdas) > 0, - "\t void rr_trainer::set_search_lambdas()" - << "\n\t lambdas must be a non-empty vector of values" - << "\n\t is_vector(lambdas): " << is_vector(lambdas) - << "\n\t lambdas.size(): " << lambdas.size() - << "\n\t min(lambdas): " << min(lambdas) - << "\n\t this: " << this - ); - - - lams = matrix_cast<scalar_type>(lambdas); - } - - const matrix<scalar_type,0,0,mem_manager_type>& get_search_lambdas ( - ) const - { - return lams; - } - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const - { - std::vector<scalar_type> temp; - scalar_type temp2; - return do_train(mat(x), mat(y), false, temp, temp2); - } - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y, - std::vector<scalar_type>& loo_values - ) const - { - scalar_type temp; - return do_train(mat(x), mat(y), true, loo_values, temp); - } - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y, - std::vector<scalar_type>& loo_values, - scalar_type& lambda_used - ) const - { - return do_train(mat(x), mat(y), true, loo_values, lambda_used); - } - - - private: - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> do_train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y, - const bool output_loo_values, - std::vector<scalar_type>& loo_values, - scalar_type& the_lambda - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(is_learning_problem(x,y), - "\t decision_function rr_trainer::train(x,y)" - << "\n\t invalid inputs were given to this function" - << "\n\t is_vector(x): " << is_vector(x) - << "\n\t is_vector(y): " << is_vector(y) - << "\n\t x.size(): " << x.size() - << "\n\t y.size(): " << y.size() - ); - -#ifdef ENABLE_ASSERTS - if (get_lambda() == 0 && will_use_regression_loss_for_loo_cv() == false) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_binary_classification_problem(x,y), - "\t decision_function rr_trainer::train(x,y)" - << "\n\t invalid inputs were given to this function" - ); - } -#endif - - typedef matrix<scalar_type,0,1,mem_manager_type> column_matrix_type; - typedef matrix<scalar_type,0,0,mem_manager_type> general_matrix_type; - - const long dims = x(0).size(); - - /* - Notes on the solution of ridge regression - - Let A = an x.size() by dims matrix which contains all the data samples. - - Let I = an identity matrix - - Let C = trans(A)*A - Let L = trans(A)*y - - Then the optimal w is given by: - w = inv(C + lambda*I) * L - - - There is a trick to compute leave one out cross validation results for many different - lambda values quickly. The following paper has a detailed discussion of various - approaches: - - Notes on Regularized Least Squares by Ryan M. Rifkin and Ross A. Lippert. - - In the implementation of the rr_trainer I'm only using two simple equations - from the above paper. - - - First note that inv(C + lambda*I) can be computed for many different lambda - values in an efficient way by using an eigen decomposition of C. So we use - the fact that: - inv(C + lambda*I) == V*inv(D + lambda*I)*trans(V) - where V*D*trans(V) == C - - Also, via some simple linear algebra the above paper works out that the leave one out - value for a sample x(i) is equal to the following: - Let G = inv(C + lambda*I) - let val = trans(x(i))*G*x(i); - - leave one out value for sample x(i): - LOOV = (trans(w)*x(i) - y(i)*val) / (1 - val) - - leave one out error for sample x(i): - LOOE = loss(y(i), LOOV) - - - Finally, note that we will pretend there was a 1 appended to the end of each - vector in x. We won't actually do that though because we don't want to - have to make a copy of all the samples. So throughout the following code - I have explicitly dealt with this. - */ - - general_matrix_type C, tempm, G; - column_matrix_type L, tempv, w; - - // compute C and L - for (long i = 0; i < x.size(); ++i) - { - C += x(i)*trans(x(i)); - L += y(i)*x(i); - tempv += x(i); - } - - // Account for the extra 1 that we pretend is appended to x - // Make C = [C tempv - // tempv' x.size()] - C = join_cols(join_rows(C, tempv), - join_rows(trans(tempv), uniform_matrix<scalar_type>(1,1, x.size()))); - L = join_cols(L, uniform_matrix<scalar_type>(1,1, sum(y))); - - eigenvalue_decomposition<general_matrix_type> eig(make_symmetric(C)); - const general_matrix_type V = eig.get_pseudo_v(); - const column_matrix_type D = eig.get_real_eigenvalues(); - - // We can save some work by pre-multiplying the x vectors by trans(V) - // and saving the result so we don't have to recompute it over and over later. - matrix<column_matrix_type,0,1,mem_manager_type > Vx; - if (lambda == 0 || output_loo_values) - { - // Save the transpose of V into a temporary because the subsequent matrix - // vector multiplies will be faster (because of better cache locality). - const general_matrix_type transV( colm(trans(V),range(0,dims-1)) ); - // Remember the pretend 1 at the end of x(*). We want to multiply trans(V)*x(*) - // so to do this we pull the last column off trans(V) and store it separately. - const column_matrix_type lastV = colm(trans(V), dims); - Vx.set_size(x.size()); - for (long i = 0; i < x.size(); ++i) - { - Vx(i) = transV*x(i); - Vx(i) = squared(Vx(i) + lastV); - } - } - - the_lambda = lambda; - - // If we need to automatically select a lambda then do so using the LOOE trick described - // above. - bool did_loov = false; - scalar_type best_looe = std::numeric_limits<scalar_type>::max(); - if (lambda == 0) - { - did_loov = true; - - // Compute leave one out errors for a bunch of different lambdas and pick the best one. - for (long idx = 0; idx < lams.size(); ++idx) - { - // first compute G - tempv = 1.0/(D + lams(idx)); - tempm = scale_columns(V,tempv); - G = tempm*trans(V); - - // compute the solution w for the current lambda - w = G*L; - - // make w have the same length as the x vectors. - const scalar_type b = w(dims); - w = colm(w,0,dims); - - scalar_type looe = 0; - for (long i = 0; i < x.size(); ++i) - { - // perform equivalent of: val = trans(x(i))*G*x(i); - const scalar_type val = dot(tempv, Vx(i)); - const scalar_type temp = (1 - val); - scalar_type loov; - if (temp != 0) - loov = (trans(w)*x(i) + b - y(i)*val) / temp; - else - loov = 0; - - looe += loss(loov, y(i)); - } - - // Keep track of the lambda which gave the lowest looe. If two lambdas - // have the same looe then pick the biggest lambda. - if (looe < best_looe || (looe == best_looe && lams(idx) > the_lambda)) - { - best_looe = looe; - the_lambda = lams(idx); - } - } - - best_looe /= x.size(); - } - - - - // Now perform the main training. That is, find w. - // first, compute G = inv(C + the_lambda*I) - tempv = 1.0/(D + the_lambda); - tempm = scale_columns(V,tempv); - G = tempm*trans(V); - w = G*L; - - // make w have the same length as the x vectors. - const scalar_type b = w(dims); - w = colm(w,0,dims); - - - // If we haven't done this already and we are supposed to then compute the LOO error rate for - // the current lambda and store the result in best_looe. - if (output_loo_values) - { - loo_values.resize(x.size()); - did_loov = true; - best_looe = 0; - for (long i = 0; i < x.size(); ++i) - { - // perform equivalent of: val = trans(x(i))*G*x(i); - const scalar_type val = dot(tempv, Vx(i)); - const scalar_type temp = (1 - val); - scalar_type loov; - if (temp != 0) - loov = (trans(w)*x(i) + b - y(i)*val) / temp; - else - loov = 0; - - best_looe += loss(loov, y(i)); - loo_values[i] = loov; - } - - best_looe /= x.size(); - - } - else - { - loo_values.clear(); - } - - if (verbose && did_loov) - { - using namespace std; - cout << "Using lambda: " << the_lambda << endl; - if (use_regression_loss) - cout << "LOO Mean Squared Error: " << best_looe << endl; - else - cout << "LOO Classification Error: " << best_looe << endl; - } - - // convert w into a proper decision function - decision_function<kernel_type> df; - df.alpha.set_size(1); - df.alpha = 1; - df.basis_vectors.set_size(1); - df.basis_vectors(0) = w; - df.b = -b; // don't forget about the bias we stuck onto all the vectors - - return df; - } - - inline scalar_type loss ( - const scalar_type& a, - const scalar_type& b - ) const - { - if (use_regression_loss) - { - return (a-b)*(a-b); - } - else - { - // if a and b have the same sign then no loss - if (a*b >= 0) - return 0; - else - return 1; - } - } - - - /*! - CONVENTION - - get_lambda() == lambda - - get_kernel() == kernel_type() - - will_use_regression_loss_for_loo_cv() == use_regression_loss - - get_search_lambdas() == lams - !*/ - - bool verbose; - bool use_regression_loss; - - scalar_type lambda; - - matrix<scalar_type,0,0,mem_manager_type> lams; - }; - -} - -#endif // DLIB_RR_TRAInER_Hh_ - - diff --git a/ml/dlib/dlib/svm/rr_trainer_abstract.h b/ml/dlib/dlib/svm/rr_trainer_abstract.h deleted file mode 100644 index f2fe21068..000000000 --- a/ml/dlib/dlib/svm/rr_trainer_abstract.h +++ /dev/null @@ -1,255 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_RR_TRAInER_ABSTRACT_Hh_ -#ifdef DLIB_RR_TRAInER_ABSTRACT_Hh_ - -#include "../algs.h" -#include "function_abstract.h" - -namespace dlib -{ - template < - typename K - > - class rr_trainer - { - /*! - REQUIREMENTS ON K - is the dlib::linear_kernel instantiated with some kind of column vector. - - INITIAL VALUE - - get_lambda() == 0 - - will_use_regression_loss_for_loo_cv() == true - - get_search_lambdas() == logspace(-9, 2, 50) - - this object will not be verbose unless be_verbose() is called - - WHAT THIS OBJECT REPRESENTS - This object represents a tool for performing linear ridge regression - (This basic algorithm is also known my many other names, e.g. regularized - least squares or least squares SVM). - - The exact definition of what this algorithm does is this: - Find w and b that minimizes the following (x_i are input samples and y_i are target values): - lambda*dot(w,w) + sum_over_i( (f(x_i) - y_i)^2 ) - where f(x) == dot(x,w) - b - - So this algorithm is just regular old least squares regression but - with the addition of a regularization term which encourages small w. - - - It is capable of estimating the lambda parameter using leave-one-out cross-validation. - - - The leave-one-out cross-validation implementation is based on the techniques - discussed in this paper: - Notes on Regularized Least Squares by Ryan M. Rifkin and Ross A. Lippert. - !*/ - - public: - typedef K kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef decision_function<kernel_type> trained_function_type; - - rr_trainer ( - ); - /*! - ensures - - This object is properly initialized and ready to be used. - !*/ - - void be_verbose ( - ); - /*! - ensures - - This object will print status messages to standard out. - !*/ - - void be_quiet ( - ); - /*! - ensures - - this object will not print anything to standard out - !*/ - - const kernel_type get_kernel ( - ) const; - /*! - ensures - - returns a copy of the kernel function in use by this object. Since - the linear kernels don't have any parameters this function just - returns kernel_type() - !*/ - - void set_lambda ( - scalar_type lambda - ); - /*! - requires - - lambda >= 0 - ensures - - #get_lambda() == lambda - !*/ - - const scalar_type get_lambda ( - ) const; - /*! - ensures - - returns the regularization parameter. It is the parameter that - determines the trade off between trying to fit the training data - exactly or allowing more errors but hopefully improving the - generalization ability of the resulting function. Smaller values - encourage exact fitting while larger values of lambda may encourage - better generalization. - - Note that a lambda of 0 has a special meaning. It indicates to this - object that it should automatically determine an appropriate lambda - value. This is done using leave-one-out cross-validation. - !*/ - - void use_regression_loss_for_loo_cv ( - ); - /*! - ensures - - #will_use_regression_loss_for_loo_cv() == true - !*/ - - void use_classification_loss_for_loo_cv ( - ); - /*! - ensures - - #will_use_regression_loss_for_loo_cv() == false - !*/ - - bool will_use_regression_loss_for_loo_cv ( - ) const; - /*! - ensures - - returns true if the automatic lambda estimation will attempt to estimate a lambda - appropriate for a regression task. Otherwise it will try and find one which - minimizes the number of classification errors. - !*/ - - template <typename EXP> - void set_search_lambdas ( - const matrix_exp<EXP>& lambdas - ); - /*! - requires - - is_vector(lambdas) == true - - lambdas.size() > 0 - - min(lambdas) > 0 - - lambdas must contain floating point numbers - ensures - - #get_search_lambdas() == lambdas - !*/ - - const matrix<scalar_type,0,0,mem_manager_type>& get_search_lambdas ( - ) const; - /*! - ensures - - returns a matrix M such that: - - is_vector(M) == true - - M == a list of all the lambda values which will be tried when performing - LOO cross-validation for determining the best lambda. - !*/ - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const; - /*! - requires - - x == a matrix or something convertible to a matrix via mat(). - Also, x should contain sample_type objects. - - y == a matrix or something convertible to a matrix via mat(). - Also, y should contain scalar_type objects. - - is_learning_problem(x,y) == true - - if (get_lambda() == 0 && will_use_regression_loss_for_loo_cv() == false) then - - is_binary_classification_problem(x,y) == true - (i.e. if you want this algorithm to estimate a lambda appropriate for - classification functions then you had better give a valid classification - problem) - ensures - - performs linear ridge regression given the training samples in x and target values in y. - - returns a decision_function F with the following properties: - - F(new_x) == predicted y value - - F.alpha.size() == 1 - - F.basis_vectors.size() == 1 - - F.alpha(0) == 1 - - - if (get_lambda() == 0) then - - This object will perform internal leave-one-out cross-validation to determine an - appropriate lambda automatically. It will compute the LOO error for each lambda - in get_search_lambdas() and select the best one. - - if (will_use_regression_loss_for_loo_cv()) then - - the lambda selected will be the one that minimizes the mean squared error. - - else - - the lambda selected will be the one that minimizes the number classification - mistakes. We say a point is classified correctly if the output of the - decision_function has the same sign as its label. - - #get_lambda() == 0 - (i.e. we don't change the get_lambda() value. If you want to know what the - automatically selected lambda value was then call the version of train() - defined below) - - else - - The user supplied value of get_lambda() will be used to perform the ridge regression. - !*/ - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y, - std::vector<scalar_type>& loo_values - ) const; - /*! - requires - - all the requirements for train(x,y) must be satisfied - ensures - - returns train(x,y) - (i.e. executes train(x,y) and returns its result) - - #loo_values.size() == y.size() - - for all valid i: - - #loo_values[i] == leave-one-out prediction for the value of y(i) based - on all the training samples other than (x(i),y(i)). - !*/ - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y, - std::vector<scalar_type>& loo_values, - scalar_type& lambda_used - ) const; - /*! - requires - - all the requirements for train(x,y) must be satisfied - ensures - - returns train(x,y) - (i.e. executes train(x,y) and returns its result) - - #loo_values.size() == y.size() - - for all valid i: - - #loo_values[i] == leave-one-out prediction for the value of y(i) based - on all the training samples other than (x(i),y(i)). - - #lambda_used == the value of lambda used to generate the - decision_function. Note that this lambda value is always - equal to get_lambda() if get_lambda() isn't 0. - !*/ - - }; - -} - -#endif // DLIB_RR_TRAInER_ABSTRACT_Hh_ - diff --git a/ml/dlib/dlib/svm/rvm.h b/ml/dlib/dlib/svm/rvm.h deleted file mode 100644 index e7ad495a2..000000000 --- a/ml/dlib/dlib/svm/rvm.h +++ /dev/null @@ -1,1018 +0,0 @@ -// Copyright (C) 2008 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_RVm_ -#define DLIB_RVm_ - -#include "rvm_abstract.h" -#include <cmath> -#include <limits> -#include "../matrix.h" -#include "../algs.h" -#include "function.h" -#include "kernel.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - namespace rvm_helpers - { - - // ------------------------------------------------------------------------------------ - - template <typename scalar_vector_type, typename mem_manager_type> - long find_next_best_alpha_to_update ( - const scalar_vector_type& S, - const scalar_vector_type& Q, - const scalar_vector_type& alpha, - const matrix<long,0,1,mem_manager_type>& active_bases, - const bool search_all_alphas, - typename scalar_vector_type::type eps - ) - /*! - ensures - - if (we can find another alpha to update) then - - returns the index of said alpha - - else - - returns -1 - !*/ - { - typedef typename scalar_vector_type::type scalar_type; - // now use S and Q to find next alpha to update. What - // we want to do here is select the alpha to update that gives us - // the greatest improvement in marginal likelihood. - long selected_idx = -1; - scalar_type greatest_improvement = -1; - for (long i = 0; i < S.nr(); ++i) - { - scalar_type value = -1; - - // if i is currently in the active set - if (active_bases(i) >= 0) - { - const long idx = active_bases(i); - const scalar_type s = alpha(idx)*S(i)/(alpha(idx) - S(i)); - const scalar_type q = alpha(idx)*Q(i)/(alpha(idx) - S(i)); - - if (q*q-s > 0) - { - // only update an existing alpha if this is a narrow search - if (search_all_alphas == false) - { - // choosing this sample would mean doing an update of an - // existing alpha value. - scalar_type new_alpha = s*s/(q*q-s); - scalar_type cur_alpha = alpha(idx); - new_alpha = 1/new_alpha; - cur_alpha = 1/cur_alpha; - - // from equation 32 in the Tipping paper - value = Q(i)*Q(i)/(S(i) + 1/(new_alpha - cur_alpha) ) - - std::log(1 + S(i)*(new_alpha - cur_alpha)); - } - - } - // we only pick an alpha to remove if this is a wide search and it wasn't one of the recently added ones - else if (search_all_alphas && idx+2 < alpha.size() ) - { - // choosing this sample would mean the alpha value is infinite - // so we would remove the selected sample from our model. - - // from equation 37 in the Tipping paper - value = Q(i)*Q(i)/(S(i) - alpha(idx)) - - std::log(1-S(i)/alpha(idx)); - - } - } - else if (search_all_alphas) - { - const scalar_type s = S(i); - const scalar_type q = Q(i); - - if (q*q-s > 0) - { - // choosing this sample would mean we would add the selected - // sample to our model. - - // from equation 27 in the Tipping paper - value = (Q(i)*Q(i)-S(i))/S(i) + std::log(S(i)/(Q(i)*Q(i))); - } - } - - if (value > greatest_improvement) - { - greatest_improvement = value; - selected_idx = i; - } - } - - // If the greatest_improvement in marginal likelihood we would get is less - // than our epsilon then report that there isn't anything else to do. But - // if it is big enough then return the selected_idx. - if (greatest_improvement > eps) - return selected_idx; - else - return -1; - } - - } // end namespace rvm_helpers - - // ------------------------------------------------------------------------------------ - - - template < - typename kern_type - > - class rvm_trainer - { - /*! - This is an implementation of the binary classifier version of the - relevance vector machine algorithm described in the paper: - Tipping, M. E. and A. C. Faul (2003). Fast marginal likelihood maximisation - for sparse Bayesian models. In C. M. Bishop and B. J. Frey (Eds.), Proceedings - of the Ninth International Workshop on Artificial Intelligence and Statistics, - Key West, FL, Jan 3-6. - - This code mostly does what is described in the above paper with the exception - that here we use a different stopping condition as well as a modified alpha - selection rule. See the code for the exact details. - !*/ - - public: - typedef kern_type kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef decision_function<kernel_type> trained_function_type; - - rvm_trainer ( - ) : eps(0.001), max_iterations(2000) - { - } - - void set_max_iterations ( - unsigned long max_iterations_ - ) - { - max_iterations = max_iterations_; - } - - unsigned long get_max_iterations ( - ) const - { - return max_iterations; - } - - void set_epsilon ( - scalar_type eps_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(eps_ > 0, - "\tvoid rvm_trainer::set_epsilon(eps_)" - << "\n\t invalid inputs were given to this function" - << "\n\t eps: " << eps_ - ); - eps = eps_; - } - - const scalar_type get_epsilon ( - ) const - { - return eps; - } - - void set_kernel ( - const kernel_type& k - ) - { - kernel = k; - } - - const kernel_type& get_kernel ( - ) const - { - return kernel; - } - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const - { - return do_train(mat(x), mat(y)); - } - - void swap ( - rvm_trainer& item - ) - { - exchange(kernel, item.kernel); - exchange(eps, item.eps); - } - - private: - - // ------------------------------------------------------------------------------------ - - typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type; - typedef matrix<scalar_type,0,0,mem_manager_type> scalar_matrix_type; - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> do_train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const - { - - // make sure requires clause is not broken - DLIB_ASSERT(is_binary_classification_problem(x,y) == true, - "\tdecision_function rvm_trainer::train(x,y)" - << "\n\t invalid inputs were given to this function" - << "\n\t x.nr(): " << x.nr() - << "\n\t y.nr(): " << y.nr() - << "\n\t x.nc(): " << x.nc() - << "\n\t y.nc(): " << y.nc() - << "\n\t is_binary_classification_problem(x,y): " << ((is_binary_classification_problem(x,y))? "true":"false") - ); - - // make a target vector where +1 examples have value 1 and -1 examples - // have a value of 0. - scalar_vector_type t(y.size()); - for (long i = 0; i < y.size(); ++i) - { - if (y(i) == 1) - t(i) = 1; - else - t(i) = 0; - } - - /*! This is the convention for the active_bases variable in the function: - - if (active_bases(i) >= 0) then - - alpha(active_bases(i)) == the alpha value associated with sample x(i) - - weights(active_bases(i)) == the weight value associated with sample x(i) - - colm(phi, active_bases(i)) == the column of phi associated with sample x(i) - - colm(phi, active_bases(i)) == kernel column i (from get_kernel_colum()) - - else - - the i'th sample isn't in the model and notionally has an alpha of infinity and - a weight of 0. - !*/ - matrix<long,0,1,mem_manager_type> active_bases(x.nr()); - scalar_matrix_type phi(x.nr(),1); - scalar_vector_type alpha(1), prev_alpha; - scalar_vector_type weights(1), prev_weights; - - scalar_vector_type tempv, K_col; - - // set the initial values of these guys - set_all_elements(active_bases, -1); - long first_basis = pick_initial_vector(x,t); - get_kernel_colum(first_basis, x, K_col); - active_bases(first_basis) = 0; - set_colm(phi,0) = K_col; - alpha(0) = compute_initial_alpha(phi, t); - weights(0) = 1; - - - // now declare a bunch of other variables we will be using below - scalar_vector_type mu, t_hat, Q, S; - scalar_matrix_type sigma; - - matrix<scalar_type,1,0,mem_manager_type> tempv2, tempv3; - scalar_matrix_type tempm; - - scalar_vector_type t_estimate; - scalar_vector_type beta; - - - Q.set_size(x.nr()); - S.set_size(x.nr()); - - bool recompute_beta = true; - - bool search_all_alphas = false; - unsigned long ticker = 0; - const unsigned long rounds_of_narrow_search = 100; - unsigned long iterations = 0; - - while (iterations != max_iterations) - { - iterations++; - if (recompute_beta) - { - // calculate the current t_estimate. (this is the predicted t value for each sample according to the - // current state of the classifier) - t_estimate = phi*weights; - - // calculate the current beta - beta = sigmoid(t_estimate); - beta = pointwise_multiply(beta,(uniform_matrix<scalar_type>(beta.nr(),beta.nc(),1)-beta)); - recompute_beta = false; - } - - // Compute optimal weights and sigma for current alpha using IRLS. This is the same - // technique documented in the paper by equations 12-14. - scalar_type weight_delta = std::numeric_limits<scalar_type>::max(); - int count = 0; - while (weight_delta > 0.0001) - { - // This is a sanity check to make sure we never get stuck in this - // loop to do some degenerate numerical condition - ++count; - if (count > 100) - { - // jump us to where search_all_alphas will be set to true - ticker = rounds_of_narrow_search; - break; - } - - // compute the updated sigma matrix - sigma = scale_columns(trans(phi),beta)*phi; - for (long r = 0; r < alpha.nr(); ++r) - sigma(r,r) += alpha(r); - sigma = inv(sigma); - - - // compute the updated weights vector (t_hat = phi*mu_mp + inv(B)*(t-y)) - t_hat = t_estimate + trans(scale_columns(trans(t-sigmoid(t_estimate)),reciprocal(beta))); - - // mu = sigma*trans(phi)*b*t_hat - mu = sigma*tmp(trans(phi)* trans(scale_columns(trans(t_hat), beta))); - - // now compute how much the weights vector changed during this iteration - // through this loop. - weight_delta = max(abs(mu-weights)); - - // put mu into the weights vector - mu.swap(weights); - - // calculate the current t_estimate - t_estimate = phi*weights; - - // calculate the current beta - beta = sigmoid(t_estimate); - beta = pointwise_multiply(beta, uniform_matrix<scalar_type>(beta.nr(),beta.nc(),1)-beta); - - } - - // check if we should do a full search for the best alpha to optimize - if (ticker >= rounds_of_narrow_search) - { - // if the current alpha and weights are equal to what they were - // at the last time we were about to start a wide search then - // we are done. - if (equal(prev_alpha, alpha, eps) && equal(prev_weights, weights, eps)) - break; - - - prev_alpha = alpha; - prev_weights = weights; - search_all_alphas = true; - ticker = 0; - } - else - { - search_all_alphas = false; - } - ++ticker; - - // compute S and Q using equations 24 and 25 (tempv = phi*sigma*trans(phi)*B*t_hat) - tempv = phi*tmp(sigma*tmp(trans(phi)*trans(scale_columns(trans(t_hat),beta)))); - for (long i = 0; i < S.size(); ++i) - { - // if we are currently limiting the search for the next alpha to update - // to the set in the active set then skip a non-active vector. - if (search_all_alphas == false && active_bases(i) == -1) - continue; - - // get the column for this sample out of the kernel matrix. If it is - // something in the active set then just get it right out of phi, otherwise - // we have to calculate it. - if (active_bases(i) != -1) - K_col = colm(phi,active_bases(i)); - else - get_kernel_colum(i, x, K_col); - - // tempv2 = trans(phi_m)*B - tempv2 = scale_columns(trans(K_col), beta); - tempv3 = tempv2*phi; - S(i) = tempv2*K_col - tempv3*sigma*trans(tempv3); - Q(i) = tempv2*t_hat - tempv2*tempv; - } - - const long selected_idx = rvm_helpers::find_next_best_alpha_to_update(S,Q,alpha,active_bases, search_all_alphas, eps); - - - // if find_next_best_alpha_to_update didn't find any good alpha to update - if (selected_idx == -1) - { - if (search_all_alphas == false) - { - // jump us to where search_all_alphas will be set to true and try again - ticker = rounds_of_narrow_search; - continue; - } - else - { - // we are really done so quit the main loop - break; - } - } - - - // next we update the selected alpha. - - // if the selected alpha is in the active set - if (active_bases(selected_idx) >= 0) - { - const long idx = active_bases(selected_idx); - const scalar_type s = alpha(idx)*S(selected_idx)/(alpha(idx) - S(selected_idx)); - const scalar_type q = alpha(idx)*Q(selected_idx)/(alpha(idx) - S(selected_idx)); - - if (q*q-s > 0) - { - // reestimate the value of alpha - alpha(idx) = s*s/(q*q-s); - - } - else - { - // the new alpha value is infinite so remove the selected alpha from our model - active_bases(selected_idx) = -1; - phi = remove_col(phi, idx); - weights = remove_row(weights, idx); - alpha = remove_row(alpha, idx); - - // fix the index values in active_bases - for (long i = 0; i < active_bases.size(); ++i) - { - if (active_bases(i) > idx) - { - active_bases(i) -= 1; - } - } - - // we changed the number of weights so we need to remember to - // recompute the beta vector next time around the main loop. - recompute_beta = true; - } - } - else - { - const scalar_type s = S(selected_idx); - const scalar_type q = Q(selected_idx); - - if (q*q-s > 0) - { - // add the selected alpha to our model - - active_bases(selected_idx) = phi.nc(); - - // update alpha - tempv.set_size(alpha.size()+1); - set_subm(tempv, get_rect(alpha)) = alpha; - tempv(phi.nc()) = s*s/(q*q-s); - tempv.swap(alpha); - - // update weights - tempv.set_size(weights.size()+1); - set_subm(tempv, get_rect(weights)) = weights; - tempv(phi.nc()) = 0; - tempv.swap(weights); - - // update phi by adding the new sample's kernel matrix column in as one of phi's columns - tempm.set_size(phi.nr(), phi.nc()+1); - set_subm(tempm, get_rect(phi)) = phi; - get_kernel_colum(selected_idx, x, K_col); - set_colm(tempm, phi.nc()) = K_col; - tempm.swap(phi); - - - // we changed the number of weights so we need to remember to - // recompute the beta vector next time around the main loop. - recompute_beta = true; - } - } - - } // end while(true). So we have converged on the final answer. - - - // now put everything into a decision_function object and return it - std_vector_c<sample_type> dictionary; - std_vector_c<scalar_type> final_weights; - for (long i = 0; i < active_bases.size(); ++i) - { - if (active_bases(i) >= 0) - { - dictionary.push_back(x(i)); - final_weights.push_back(weights(active_bases(i))); - } - } - - return decision_function<kernel_type> ( mat(final_weights), - -sum(mat(final_weights))*tau, - kernel, - mat(dictionary)); - - } - - // ------------------------------------------------------------------------------------ - - template <typename M1, typename M2> - long pick_initial_vector ( - const M1& x, - const M2& t - ) const - { - scalar_vector_type K_col; - double max_projection = -std::numeric_limits<scalar_type>::infinity(); - long max_idx = 0; - // find the row in the kernel matrix that has the biggest normalized projection onto the t vector - for (long r = 0; r < x.nr(); ++r) - { - get_kernel_colum(r,x,K_col); - double temp = trans(K_col)*t; - temp = temp*temp/length_squared(K_col); - - if (temp > max_projection) - { - max_projection = temp; - max_idx = r; - } - } - - return max_idx; - } - - // ------------------------------------------------------------------------------------ - - template <typename T> - void get_kernel_colum ( - long idx, - const T& x, - scalar_vector_type& col - ) const - { - col.set_size(x.nr()); - for (long i = 0; i < col.size(); ++i) - { - col(i) = kernel(x(idx), x(i)) + tau; - } - } - - // ------------------------------------------------------------------------------------ - - template <typename M1, typename M2> - scalar_type compute_initial_alpha ( - const M1& phi, - const M2& t - ) const - { - const double temp = length_squared(phi); - const double temp2 = trans(phi)*t; - - return temp/( temp2*temp2/temp + variance(t)*0.1); - } - - // ------------------------------------------------------------------------------------ - - // private member variables - kernel_type kernel; - scalar_type eps; - unsigned long max_iterations; - - const static scalar_type tau; - - }; // end of class rvm_trainer - - template <typename kernel_type> - const typename kernel_type::scalar_type rvm_trainer<kernel_type>::tau = static_cast<typename kernel_type::scalar_type>(0.001); - -// ---------------------------------------------------------------------------------------- - - template <typename K> - void swap ( - rvm_trainer<K>& a, - rvm_trainer<K>& b - ) { a.swap(b); } - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename kern_type - > - class rvm_regression_trainer - { - /*! - This is an implementation of the regression version of the - relevance vector machine algorithm described in the paper: - Tipping, M. E. and A. C. Faul (2003). Fast marginal likelihood maximisation - for sparse Bayesian models. In C. M. Bishop and B. J. Frey (Eds.), Proceedings - of the Ninth International Workshop on Artificial Intelligence and Statistics, - Key West, FL, Jan 3-6. - - This code mostly does what is described in the above paper with the exception - that here we use a different stopping condition as well as a modified alpha - selection rule. See the code for the exact details. - !*/ - - public: - typedef kern_type kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef decision_function<kernel_type> trained_function_type; - - rvm_regression_trainer ( - ) : eps(0.001) - { - } - - void set_epsilon ( - scalar_type eps_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(eps_ > 0, - "\tvoid rvm_regression_trainer::set_epsilon(eps_)" - << "\n\t invalid inputs were given to this function" - << "\n\t eps: " << eps_ - ); - eps = eps_; - } - - const scalar_type get_epsilon ( - ) const - { - return eps; - } - - void set_kernel ( - const kernel_type& k - ) - { - kernel = k; - } - - const kernel_type& get_kernel ( - ) const - { - return kernel; - } - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& t - ) const - { - return do_train(mat(x), mat(t)); - } - - void swap ( - rvm_regression_trainer& item - ) - { - exchange(kernel, item.kernel); - exchange(eps, item.eps); - } - - private: - - // ------------------------------------------------------------------------------------ - - typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type; - typedef matrix<scalar_type,0,0,mem_manager_type> scalar_matrix_type; - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> do_train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& t - ) const - { - - // make sure requires clause is not broken - DLIB_ASSERT(is_learning_problem(x,t) && x.size() > 0, - "\tdecision_function rvm_regression_trainer::train(x,t)" - << "\n\t invalid inputs were given to this function" - << "\n\t x.nr(): " << x.nr() - << "\n\t t.nr(): " << t.nr() - << "\n\t x.nc(): " << x.nc() - << "\n\t t.nc(): " << t.nc() - ); - - - /*! This is the convention for the active_bases variable in the function: - - if (active_bases(i) >= 0) then - - alpha(active_bases(i)) == the alpha value associated with sample x(i) - - weights(active_bases(i)) == the weight value associated with sample x(i) - - colm(phi, active_bases(i)) == the column of phi associated with sample x(i) - - colm(phi, active_bases(i)) == kernel column i (from get_kernel_colum()) - - else - - the i'th sample isn't in the model and notionally has an alpha of infinity and - a weight of 0. - !*/ - matrix<long,0,1,mem_manager_type> active_bases(x.nr()); - scalar_matrix_type phi(x.nr(),1); - scalar_vector_type alpha(1), prev_alpha; - scalar_vector_type weights(1), prev_weights; - - scalar_vector_type tempv, K_col; - scalar_type var = variance(t)*0.1; - - // set the initial values of these guys - set_all_elements(active_bases, -1); - long first_basis = pick_initial_vector(x,t); - get_kernel_colum(first_basis, x, K_col); - active_bases(first_basis) = 0; - set_colm(phi,0) = K_col; - alpha(0) = compute_initial_alpha(phi, t, var); - weights(0) = 1; - - - // now declare a bunch of other variables we will be using below - scalar_vector_type Q, S; - scalar_matrix_type sigma; - - matrix<scalar_type,1,0,mem_manager_type> tempv2, tempv3; - scalar_matrix_type tempm; - - - Q.set_size(x.nr()); - S.set_size(x.nr()); - - - bool search_all_alphas = false; - unsigned long ticker = 0; - const unsigned long rounds_of_narrow_search = 100; - - while (true) - { - // Compute optimal weights and sigma for current alpha using equation 6. - sigma = trans(phi)*phi/var; - for (long r = 0; r < alpha.nr(); ++r) - sigma(r,r) += alpha(r); - sigma = inv(sigma); - weights = sigma*trans(phi)*t/var; - - - - // check if we should do a full search for the best alpha to optimize - if (ticker == rounds_of_narrow_search) - { - // if the current alpha and weights are equal to what they were - // at the last time we were about to start a wide search then - // we are done. - if (equal(prev_alpha, alpha, eps) && equal(prev_weights, weights, eps)) - break; - - prev_alpha = alpha; - prev_weights = weights; - search_all_alphas = true; - ticker = 0; - } - else - { - search_all_alphas = false; - } - ++ticker; - - // compute S and Q using equations 24 and 25 (tempv = phi*sigma*trans(phi)*B*t) - tempv = phi*tmp(sigma*tmp(trans(phi)*t/var)); - for (long i = 0; i < S.size(); ++i) - { - // if we are currently limiting the search for the next alpha to update - // to the set in the active set then skip a non-active vector. - if (search_all_alphas == false && active_bases(i) == -1) - continue; - - // get the column for this sample out of the kernel matrix. If it is - // something in the active set then just get it right out of phi, otherwise - // we have to calculate it. - if (active_bases(i) != -1) - K_col = colm(phi,active_bases(i)); - else - get_kernel_colum(i, x, K_col); - - // tempv2 = trans(phi_m)*B - tempv2 = trans(K_col)/var; - tempv3 = tempv2*phi; - S(i) = tempv2*K_col - tempv3*sigma*trans(tempv3); - Q(i) = tempv2*t - tempv2*tempv; - } - - const long selected_idx = rvm_helpers::find_next_best_alpha_to_update(S,Q,alpha,active_bases, search_all_alphas, eps); - - // if find_next_best_alpha_to_update didn't find any good alpha to update - if (selected_idx == -1) - { - if (search_all_alphas == false) - { - // jump us to where search_all_alphas will be set to true and try again - ticker = rounds_of_narrow_search; - continue; - } - else - { - // we are really done so quit the main loop - break; - } - } - - // recompute the variance - var = length_squared(t - phi*weights)/(x.nr() - weights.size() + trans(alpha)*diag(sigma)); - - // next we update the selected alpha. - - // if the selected alpha is in the active set - if (active_bases(selected_idx) >= 0) - { - const long idx = active_bases(selected_idx); - const scalar_type s = alpha(idx)*S(selected_idx)/(alpha(idx) - S(selected_idx)); - const scalar_type q = alpha(idx)*Q(selected_idx)/(alpha(idx) - S(selected_idx)); - - if (q*q-s > 0) - { - // reestimate the value of alpha - alpha(idx) = s*s/(q*q-s); - - } - else - { - // the new alpha value is infinite so remove the selected alpha from our model - active_bases(selected_idx) = -1; - phi = remove_col(phi, idx); - weights = remove_row(weights, idx); - alpha = remove_row(alpha, idx); - - // fix the index values in active_bases - for (long i = 0; i < active_bases.size(); ++i) - { - if (active_bases(i) > idx) - { - active_bases(i) -= 1; - } - } - } - } - else - { - const scalar_type s = S(selected_idx); - const scalar_type q = Q(selected_idx); - - if (q*q-s > 0) - { - // add the selected alpha to our model - - active_bases(selected_idx) = phi.nc(); - - // update alpha - tempv.set_size(alpha.size()+1); - set_subm(tempv, get_rect(alpha)) = alpha; - tempv(phi.nc()) = s*s/(q*q-s); - tempv.swap(alpha); - - // update weights - tempv.set_size(weights.size()+1); - set_subm(tempv, get_rect(weights)) = weights; - tempv(phi.nc()) = 0; - tempv.swap(weights); - - // update phi by adding the new sample's kernel matrix column in as one of phi's columns - tempm.set_size(phi.nr(), phi.nc()+1); - set_subm(tempm, get_rect(phi)) = phi; - get_kernel_colum(selected_idx, x, K_col); - set_colm(tempm, phi.nc()) = K_col; - tempm.swap(phi); - - } - } - - - - } // end while(true). So we have converged on the final answer. - - - // now put everything into a decision_function object and return it - std_vector_c<sample_type> dictionary; - std_vector_c<scalar_type> final_weights; - for (long i = 0; i < active_bases.size(); ++i) - { - if (active_bases(i) >= 0) - { - dictionary.push_back(x(i)); - final_weights.push_back(weights(active_bases(i))); - } - } - - return decision_function<kernel_type> ( mat(final_weights), - -sum(mat(final_weights))*tau, - kernel, - mat(dictionary)); - - } - - // ------------------------------------------------------------------------------------ - - template <typename T> - void get_kernel_colum ( - long idx, - const T& x, - scalar_vector_type& col - ) const - { - col.set_size(x.nr()); - for (long i = 0; i < col.size(); ++i) - { - col(i) = kernel(x(idx), x(i)) + tau; - } - } - - // ------------------------------------------------------------------------------------ - - template <typename M1, typename M2> - scalar_type compute_initial_alpha ( - const M1& phi, - const M2& t, - const scalar_type& var - ) const - { - const double temp = length_squared(phi); - const double temp2 = trans(phi)*t; - - return temp/( temp2*temp2/temp + var); - } - - // ------------------------------------------------------------------------------------ - - template <typename M1, typename M2> - long pick_initial_vector ( - const M1& x, - const M2& t - ) const - { - scalar_vector_type K_col; - double max_projection = -std::numeric_limits<scalar_type>::infinity(); - long max_idx = 0; - // find the row in the kernel matrix that has the biggest normalized projection onto the t vector - for (long r = 0; r < x.nr(); ++r) - { - get_kernel_colum(r,x,K_col); - double temp = trans(K_col)*t; - temp = temp*temp/length_squared(K_col); - - if (temp > max_projection) - { - max_projection = temp; - max_idx = r; - } - } - - return max_idx; - } - - // ------------------------------------------------------------------------------------ - - // private member variables - kernel_type kernel; - scalar_type eps; - - const static scalar_type tau; - - }; // end of class rvm_regression_trainer - - template <typename kernel_type> - const typename kernel_type::scalar_type rvm_regression_trainer<kernel_type>::tau = static_cast<typename kernel_type::scalar_type>(0.001); - -// ---------------------------------------------------------------------------------------- - - template <typename K> - void swap ( - rvm_regression_trainer<K>& a, - rvm_regression_trainer<K>& b - ) { a.swap(b); } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_RVm_ - - diff --git a/ml/dlib/dlib/svm/rvm_abstract.h b/ml/dlib/dlib/svm/rvm_abstract.h deleted file mode 100644 index 236d2ad3c..000000000 --- a/ml/dlib/dlib/svm/rvm_abstract.h +++ /dev/null @@ -1,278 +0,0 @@ -// Copyright (C) 2008 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_RVm_ABSTRACT_ -#ifdef DLIB_RVm_ABSTRACT_ - -#include <cmath> -#include <limits> -#include "../matrix.h" -#include "../algs.h" -#include "function.h" -#include "kernel.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename kern_type - > - class rvm_trainer - { - /*! - REQUIREMENTS ON kern_type - is a kernel function object as defined in dlib/svm/kernel_abstract.h - - WHAT THIS OBJECT REPRESENTS - This object implements a trainer for a relevance vector machine for - solving binary classification problems. - - The implementation of the RVM training algorithm used by this object is based - on the following excellent paper: - Tipping, M. E. and A. C. Faul (2003). Fast marginal likelihood maximisation - for sparse Bayesian models. In C. M. Bishop and B. J. Frey (Eds.), Proceedings - of the Ninth International Workshop on Artificial Intelligence and Statistics, - Key West, FL, Jan 3-6. - !*/ - - public: - typedef kern_type kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef decision_function<kernel_type> trained_function_type; - - rvm_trainer ( - ); - /*! - ensures - - This object is properly initialized and ready to be used - to train a relevance vector machine. - - #get_epsilon() == 0.001 - - #get_max_iterations() == 2000 - !*/ - - void set_epsilon ( - scalar_type eps - ); - /*! - requires - - eps > 0 - ensures - - #get_epsilon() == eps - !*/ - - const scalar_type get_epsilon ( - ) const; - /*! - ensures - - returns the error epsilon that determines when training should stop. - Generally a good value for this is 0.001. Smaller values may result - in a more accurate solution but take longer to execute. - !*/ - - void set_kernel ( - const kernel_type& k - ); - /*! - ensures - - #get_kernel() == k - !*/ - - const kernel_type& get_kernel ( - ) const; - /*! - ensures - - returns a copy of the kernel function in use by this object - !*/ - - unsigned long get_max_iterations ( - ) const; - /*! - ensures - - returns the maximum number of iterations the RVM optimizer is allowed to - run before it is required to stop and return a result. - !*/ - - void set_max_iterations ( - unsigned long max_iter - ); - /*! - ensures - - #get_max_iterations() == max_iter - !*/ - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const; - /*! - requires - - is_binary_classification_problem(x,y) == true - - x == a matrix or something convertible to a matrix via mat(). - Also, x should contain sample_type objects. - - y == a matrix or something convertible to a matrix via mat(). - Also, y should contain scalar_type objects. - ensures - - trains a relevance vector classifier given the training samples in x and - labels in y. - - returns a decision function F with the following properties: - - if (new_x is a sample predicted have +1 label) then - - F(new_x) >= 0 - - else - - F(new_x) < 0 - throws - - std::bad_alloc - !*/ - - void swap ( - rvm_trainer& item - ); - /*! - ensures - - swaps *this and item - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - template <typename K> - void swap ( - rvm_trainer<K>& a, - rvm_trainer<K>& b - ) { a.swap(b); } - /*! - provides a global swap - !*/ - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename kern_type - > - class rvm_regression_trainer - { - /*! - REQUIREMENTS ON kern_type - is a kernel function object as defined in dlib/svm/kernel_abstract.h - - WHAT THIS OBJECT REPRESENTS - This object implements a trainer for a relevance vector machine for - solving regression problems. - - The implementation of the RVM training algorithm used by this object is based - on the following excellent paper: - Tipping, M. E. and A. C. Faul (2003). Fast marginal likelihood maximisation - for sparse Bayesian models. In C. M. Bishop and B. J. Frey (Eds.), Proceedings - of the Ninth International Workshop on Artificial Intelligence and Statistics, - Key West, FL, Jan 3-6. - !*/ - - public: - typedef kern_type kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef decision_function<kernel_type> trained_function_type; - - rvm_regression_trainer ( - ); - /*! - ensures - - This object is properly initialized and ready to be used - to train a relevance vector machine. - - #get_epsilon() == 0.001 - !*/ - - void set_epsilon ( - scalar_type eps - ); - /*! - requires - - eps > 0 - ensures - - #get_epsilon() == eps - !*/ - - const scalar_type get_epsilon ( - ) const; - /*! - ensures - - returns the error epsilon that determines when training should stop. - Generally a good value for this is 0.001. Smaller values may result - in a more accurate solution but take longer to execute. - !*/ - - void set_kernel ( - const kernel_type& k - ); - /*! - ensures - - #get_kernel() == k - !*/ - - const kernel_type& get_kernel ( - ) const; - /*! - ensures - - returns a copy of the kernel function in use by this object - !*/ - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const; - /*! - requires - - x == a matrix or something convertible to a matrix via mat(). - Also, x should contain sample_type objects. - - y == a matrix or something convertible to a matrix via mat(). - Also, y should contain scalar_type objects. - - is_learning_problem(x,y) == true - - x.size() > 0 - ensures - - trains a RVM given the training samples in x and - labels in y and returns the resulting decision_function. - throws - - std::bad_alloc - !*/ - - void swap ( - rvm_regression_trainer& item - ); - /*! - ensures - - swaps *this and item - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - template <typename K> - void swap ( - rvm_regression_trainer<K>& a, - rvm_regression_trainer<K>& b - ) { a.swap(b); } - /*! - provides a global swap - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_RVm_ABSTRACT_ - diff --git a/ml/dlib/dlib/svm/sequence_labeler.h b/ml/dlib/dlib/svm/sequence_labeler.h deleted file mode 100644 index 882cdb881..000000000 --- a/ml/dlib/dlib/svm/sequence_labeler.h +++ /dev/null @@ -1,339 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_SEQUENCE_LAbELER_H_h_ -#define DLIB_SEQUENCE_LAbELER_H_h_ - -#include "sequence_labeler_abstract.h" -#include "../matrix.h" -#include <vector> -#include "../optimization/find_max_factor_graph_viterbi.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - namespace fe_helpers - { - template <typename EXP> - struct dot_functor - { - dot_functor(const matrix_exp<EXP>& lambda_) : lambda(lambda_), value(0) {} - - inline void operator() ( - unsigned long feat_index - ) - { - value += lambda(feat_index); - } - - inline void operator() ( - unsigned long feat_index, - double feat_value - ) - { - value += feat_value*lambda(feat_index); - } - - const matrix_exp<EXP>& lambda; - double value; - }; - - template <typename feature_extractor, typename EXP, typename sequence_type, typename EXP2> - double dot( - const matrix_exp<EXP>& lambda, - const feature_extractor& fe, - const sequence_type& sequence, - const matrix_exp<EXP2>& candidate_labeling, - unsigned long position - ) - { - dot_functor<EXP> dot(lambda); - fe.get_features(dot, sequence, candidate_labeling, position); - return dot.value; - } - - } - -// ---------------------------------------------------------------------------------------- - - namespace impl - { - DLIB_MAKE_HAS_MEMBER_FUNCTION_TEST( - has_reject_labeling, - bool, - template reject_labeling<matrix<unsigned long> >, - (const typename T::sequence_type&, const matrix_exp<matrix<unsigned long> >&, unsigned long)const - ); - - template <typename feature_extractor, typename EXP, typename sequence_type> - typename enable_if<has_reject_labeling<feature_extractor>,bool>::type call_reject_labeling_if_exists ( - const feature_extractor& fe, - const sequence_type& x, - const matrix_exp<EXP>& y, - unsigned long position - ) - { - return fe.reject_labeling(x, y, position); - } - - template <typename feature_extractor, typename EXP, typename sequence_type> - typename disable_if<has_reject_labeling<feature_extractor>,bool>::type call_reject_labeling_if_exists ( - const feature_extractor& , - const sequence_type& , - const matrix_exp<EXP>& , - unsigned long - ) - { - return false; - } - } - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - typename enable_if<dlib::impl::has_reject_labeling<feature_extractor>,bool>::type contains_invalid_labeling ( - const feature_extractor& fe, - const typename feature_extractor::sequence_type& x, - const std::vector<unsigned long>& y - ) - { - if (x.size() != y.size()) - return true; - - matrix<unsigned long,0,1> node_states; - - for (unsigned long i = 0; i < x.size(); ++i) - { - node_states.set_size(std::min(fe.order(),i) + 1); - for (unsigned long j = 0; j < (unsigned long)node_states.size(); ++j) - node_states(j) = y[i-j]; - - if (fe.reject_labeling(x, node_states, i)) - return true; - } - - return false; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - typename disable_if<dlib::impl::has_reject_labeling<feature_extractor>,bool>::type contains_invalid_labeling ( - const feature_extractor& , - const typename feature_extractor::sequence_type& x, - const std::vector<unsigned long>& y - ) - { - if (x.size() != y.size()) - return true; - - return false; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - bool contains_invalid_labeling ( - const feature_extractor& fe, - const std::vector<typename feature_extractor::sequence_type>& x, - const std::vector<std::vector<unsigned long> >& y - ) - { - if (x.size() != y.size()) - return true; - - for (unsigned long i = 0; i < x.size(); ++i) - { - if (contains_invalid_labeling(fe,x[i],y[i])) - return true; - } - return false; - } - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - class sequence_labeler - { - public: - typedef typename feature_extractor::sequence_type sample_sequence_type; - typedef std::vector<unsigned long> labeled_sequence_type; - - private: - class map_prob - { - public: - unsigned long order() const { return fe.order(); } - unsigned long num_states() const { return fe.num_labels(); } - - map_prob( - const sample_sequence_type& x_, - const feature_extractor& fe_, - const matrix<double,0,1>& weights_ - ) : - sequence(x_), - fe(fe_), - weights(weights_) - { - } - - unsigned long number_of_nodes( - ) const - { - return sequence.size(); - } - - template < - typename EXP - > - double factor_value ( - unsigned long node_id, - const matrix_exp<EXP>& node_states - ) const - { - if (dlib::impl::call_reject_labeling_if_exists(fe, sequence, node_states, node_id)) - return -std::numeric_limits<double>::infinity(); - - return fe_helpers::dot(weights, fe, sequence, node_states, node_id); - } - - const sample_sequence_type& sequence; - const feature_extractor& fe; - const matrix<double,0,1>& weights; - }; - public: - - sequence_labeler() - { - weights.set_size(fe.num_features()); - weights = 0; - } - - explicit sequence_labeler( - const matrix<double,0,1>& weights_ - ) : - weights(weights_) - { - // make sure requires clause is not broken - DLIB_ASSERT(fe.num_features() == static_cast<unsigned long>(weights_.size()), - "\t sequence_labeler::sequence_labeler(weights_)" - << "\n\t These sizes should match" - << "\n\t fe.num_features(): " << fe.num_features() - << "\n\t weights_.size(): " << weights_.size() - << "\n\t this: " << this - ); - } - - sequence_labeler( - const matrix<double,0,1>& weights_, - const feature_extractor& fe_ - ) : - fe(fe_), - weights(weights_) - { - // make sure requires clause is not broken - DLIB_ASSERT(fe_.num_features() == static_cast<unsigned long>(weights_.size()), - "\t sequence_labeler::sequence_labeler(weights_,fe_)" - << "\n\t These sizes should match" - << "\n\t fe_.num_features(): " << fe_.num_features() - << "\n\t weights_.size(): " << weights_.size() - << "\n\t this: " << this - ); - } - - const feature_extractor& get_feature_extractor ( - ) const { return fe; } - - const matrix<double,0,1>& get_weights ( - ) const { return weights; } - - unsigned long num_labels ( - ) const { return fe.num_labels(); } - - labeled_sequence_type operator() ( - const sample_sequence_type& x - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(num_labels() > 0, - "\t labeled_sequence_type sequence_labeler::operator()(x)" - << "\n\t You can't have no labels." - << "\n\t this: " << this - ); - - labeled_sequence_type y; - find_max_factor_graph_viterbi(map_prob(x,fe,weights), y); - return y; - } - - void label_sequence ( - const sample_sequence_type& x, - labeled_sequence_type& y - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(num_labels() > 0, - "\t void sequence_labeler::label_sequence(x,y)" - << "\n\t You can't have no labels." - << "\n\t this: " << this - ); - - find_max_factor_graph_viterbi(map_prob(x,fe,weights), y); - } - - private: - - feature_extractor fe; - matrix<double,0,1> weights; - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - void serialize ( - const sequence_labeler<feature_extractor>& item, - std::ostream& out - ) - { - serialize(item.get_feature_extractor(), out); - serialize(item.get_weights(), out); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - void deserialize ( - sequence_labeler<feature_extractor>& item, - std::istream& in - ) - { - feature_extractor fe; - matrix<double,0,1> weights; - - deserialize(fe, in); - deserialize(weights, in); - - item = sequence_labeler<feature_extractor>(weights, fe); - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SEQUENCE_LAbELER_H_h_ - diff --git a/ml/dlib/dlib/svm/sequence_labeler_abstract.h b/ml/dlib/dlib/svm/sequence_labeler_abstract.h deleted file mode 100644 index 3970b723a..000000000 --- a/ml/dlib/dlib/svm/sequence_labeler_abstract.h +++ /dev/null @@ -1,396 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_SEQUENCE_LAbELER_ABSTRACT_H_h_ -#ifdef DLIB_SEQUENCE_LAbELER_ABSTRACT_H_h_ - -#include "../matrix.h" -#include <vector> -#include "../optimization/find_max_factor_graph_viterbi_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - class example_feature_extractor - { - /*! - WHAT THIS OBJECT REPRESENTS - This object defines the interface a feature extractor must implement - if it is to be used with the sequence_labeler defined at the bottom - of this file. - - The model used by sequence_labeler objects is the following. - Given an input sequence x, predict an output label sequence y - such that: - y == argmax_Y dot(w, PSI(x,Y)) - Where w is a parameter vector. - - Therefore, a feature extractor defines how the PSI(x,y) feature vector - is calculated. It also defines how many output labels there are as - well as the order of the model. - - Finally, note that PSI(x,y) is a sum of feature vectors, each derived - from the entire input sequence x but only part of the label sequence y. - Each of these constituent feature vectors is defined by the get_features() - method of this class. - - THREAD SAFETY - Instances of this object are required to be threadsafe, that is, it should - be safe for multiple threads to make concurrent calls to the member - functions of this object. - !*/ - - public: - // This should be the type used to represent an input sequence. It can be - // anything so long as it has a .size() which returns the length of the sequence. - typedef the_type_used_to_represent_a_sequence sequence_type; - - example_feature_extractor ( - ); - /*! - ensures - - this object is properly initialized - !*/ - - unsigned long num_features ( - ) const; - /*! - ensures - - returns the dimensionality of the PSI() feature vector. - !*/ - - unsigned long order( - ) const; - /*! - ensures - - This object represents a Markov model on the output labels. - This parameter defines the order of the model. That is, this - value controls how many previous label values get to be taken - into consideration when performing feature extraction for a - particular element of the input sequence. Note that the runtime - of the algorithm is exponential in the order. So don't make order - very large. - !*/ - - unsigned long num_labels( - ) const; - /*! - ensures - - returns the number of possible output labels. - !*/ - - template <typename EXP> - bool reject_labeling ( - const sequence_type& x, - const matrix_exp<EXP>& y, - unsigned long position - ) const; - /*! - requires - - EXP::type == unsigned long - (i.e. y contains unsigned longs) - - position < x.size() - - y.size() == min(position, order()) + 1 - - is_vector(y) == true - - max(y) < num_labels() - ensures - - for all valid i: - - interprets y(i) as the label corresponding to x[position-i] - - if (the labeling in y for x[position] is always the wrong labeling) then - - returns true - (note that reject_labeling() is just an optional tool to allow you - to overrule the normal labeling algorithm. You don't have to use - it. So if you don't include a reject_labeling() method in your - feature extractor it is the same as including one that always - returns false.) - - else - - returns false - !*/ - - template <typename feature_setter, typename EXP> - void get_features ( - feature_setter& set_feature, - const sequence_type& x, - const matrix_exp<EXP>& y, - unsigned long position - ) const; - /*! - requires - - EXP::type == unsigned long - (i.e. y contains unsigned longs) - - reject_labeling(x,y,position) == false - - position < x.size() - - y.size() == min(position, order()) + 1 - - is_vector(y) == true - - max(y) < num_labels() - - set_feature is a function object which allows expressions of the form: - - set_features((unsigned long)feature_index, (double)feature_value); - - set_features((unsigned long)feature_index); - ensures - - for all valid i: - - interprets y(i) as the label corresponding to x[position-i] - - This function computes the part of PSI() corresponding to the x[position] - element of the input sequence. Moreover, this part of PSI() is returned as - a sparse vector by invoking set_feature(). For example, to set the feature - with an index of 55 to the value of 1 this method would call: - set_feature(55); - Or equivalently: - set_feature(55,1); - Therefore, the first argument to set_feature is the index of the feature - to be set while the second argument is the value the feature should take. - Additionally, note that calling set_feature() multiple times with the same - feature index does NOT overwrite the old value, it adds to the previous - value. For example, if you call set_feature(55) 3 times then it will - result in feature 55 having a value of 3. - - This function only calls set_feature() with feature_index values < num_features() - !*/ - - unsigned long num_nonnegative_weights ( - ) const; - /*! - ensures - - returns the number of elements of the w parameter vector which should be - non-negative. That is, this feature extractor is intended to be used - with w vectors where the first num_nonnegative_weights() elements of w - are >= 0. That is, it should be the case that w(i) >= 0 for all i < - num_nonnegative_weights(). - - Note that num_nonnegative_weights() is just an optional method to allow - you to tell a tool like the structural_sequence_labeling_trainer that the - learned w should have a certain number of non-negative elements. - Therefore, if you do not provide a num_nonnegative_weights() method in - your feature extractor then it will default to a value of 0, indicating - that all elements of the w parameter vector may be any value. - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - void serialize( - const example_feature_extractor& item, - std::ostream& out - ); - /*! - provides serialization support - !*/ - - void deserialize( - example_feature_extractor& item, - std::istream& in - ); - /*! - provides deserialization support - !*/ - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - bool contains_invalid_labeling ( - const feature_extractor& fe, - const typename feature_extractor::sequence_type& x, - const std::vector<unsigned long>& y - ); - /*! - requires - - feature_extractor must be an object that implements an interface compatible - with the example_feature_extractor discussed above. - ensures - - if (x.size() != y.size() || - fe.reject_labeling() rejects any of the labels in y) then - - returns true - - else - - returns false - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - bool contains_invalid_labeling ( - const feature_extractor& fe, - const std::vector<typename feature_extractor::sequence_type>& x, - const std::vector<std::vector<unsigned long> >& y - ); - /*! - requires - - feature_extractor must be an object that implements an interface compatible - with the example_feature_extractor discussed above. - ensures - - if (x.size() != y.size() || - contains_invalid_labeling(fe,x[i],y[i]) == true for some i ) then - - returns true - - else - - returns false - !*/ - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - class sequence_labeler - { - /*! - REQUIREMENTS ON feature_extractor - It must be an object that implements an interface compatible with - the example_feature_extractor discussed above. - - WHAT THIS OBJECT REPRESENTS - This object is a tool for doing sequence labeling. In particular, it is - capable of representing sequence labeling models such as those produced by - Hidden Markov SVMs or Chain Structured Conditional Random fields. See the - following papers for an introduction to these techniques: - - Hidden Markov Support Vector Machines by - Y. Altun, I. Tsochantaridis, T. Hofmann - - Shallow Parsing with Conditional Random Fields by - Fei Sha and Fernando Pereira - - - The model used by this object is the following. Given - an input sequence x, predict an output label sequence y - such that: - y == argmax_Y dot(get_weights(), PSI(x,Y)) - Where PSI() is defined by the feature_extractor template - argument. - - THREAD SAFETY - It is always safe to use distinct instances of this object in different - threads. However, when a single instance is shared between threads then - the following rules apply: - It is safe to call the const members of this object from multiple - threads so long as the feature_extractor is also threadsafe. This is - because the const members are purely read-only operations. However, - any operation that modifies a sequence_labeler is not threadsafe. - !*/ - - public: - typedef typename feature_extractor::sequence_type sample_sequence_type; - typedef std::vector<unsigned long> labeled_sequence_type; - - sequence_labeler( - ); - /*! - ensures - - #get_feature_extractor() == feature_extractor() - (i.e. it will have its default value) - - #get_weights().size() == #get_feature_extractor().num_features() - - #get_weights() == 0 - !*/ - - explicit sequence_labeler( - const matrix<double,0,1>& weights - ); - /*! - requires - - feature_extractor().num_features() == weights.size() - ensures - - #get_feature_extractor() == feature_extractor() - (i.e. it will have its default value) - - #get_weights() == weights - !*/ - - sequence_labeler( - const matrix<double,0,1>& weights, - const feature_extractor& fe - ); - /*! - requires - - fe.num_features() == weights.size() - ensures - - #get_feature_extractor() == fe - - #get_weights() == weights - !*/ - - const feature_extractor& get_feature_extractor ( - ) const; - /*! - ensures - - returns the feature extractor used by this object - !*/ - - const matrix<double,0,1>& get_weights ( - ) const; - /*! - ensures - - returns the parameter vector associated with this sequence labeler. - The length of the vector is get_feature_extractor().num_features(). - !*/ - - unsigned long num_labels ( - ) const; - /*! - ensures - - returns get_feature_extractor().num_labels() - (i.e. returns the number of possible output labels for each - element of a sequence) - !*/ - - labeled_sequence_type operator() ( - const sample_sequence_type& x - ) const; - /*! - requires - - num_labels() > 0 - ensures - - returns a vector Y of label values such that: - - Y.size() == x.size() - - for all valid i: - - Y[i] == the predicted label for x[i] - - 0 <= Y[i] < num_labels() - !*/ - - void label_sequence ( - const sample_sequence_type& x, - labeled_sequence_type& y - ) const; - /*! - requires - - num_labels() > 0 - ensures - - #y == (*this)(x) - (i.e. This is just another interface to the operator() routine - above. This one avoids returning the results by value and therefore - might be a little faster in some cases) - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - void serialize ( - const sequence_labeler<feature_extractor>& item, - std::ostream& out - ); - /*! - provides serialization support - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - void deserialize ( - sequence_labeler<feature_extractor>& item, - std::istream& in - ); - /*! - provides deserialization support - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SEQUENCE_LAbELER_ABSTRACT_H_h_ - - diff --git a/ml/dlib/dlib/svm/sequence_segmenter.h b/ml/dlib/dlib/svm/sequence_segmenter.h deleted file mode 100644 index 237023efa..000000000 --- a/ml/dlib/dlib/svm/sequence_segmenter.h +++ /dev/null @@ -1,468 +0,0 @@ -// Copyright (C) 2013 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_SEQUENCE_SeGMENTER_H_h_ -#define DLIB_SEQUENCE_SeGMENTER_H_h_ - -#include "sequence_segmenter_abstract.h" -#include "../matrix.h" -#include "sequence_labeler.h" -#include <vector> - -namespace dlib -{ - // This namespace contains implementation details for the sequence_segmenter. - namespace impl_ss - { - - // ------------------------------------------------------------------------------------ - - // BIO/BILOU labels - const unsigned int BEGIN = 0; - const unsigned int INSIDE = 1; - const unsigned int OUTSIDE = 2; - const unsigned int LAST = 3; - const unsigned int UNIT = 4; - - - // ------------------------------------------------------------------------------------ - - template <typename ss_feature_extractor> - class feature_extractor - { - /*! - WHAT THIS OBJECT REPRESENTS - This is a feature extractor for a sequence_labeler. It serves to map - the interface defined by a sequence_labeler into the kind of interface - defined for a sequence_segmenter. - !*/ - - public: - typedef typename ss_feature_extractor::sequence_type sequence_type; - - ss_feature_extractor fe; - - feature_extractor() {} - feature_extractor(const ss_feature_extractor& ss_fe_) : fe(ss_fe_) {} - - unsigned long num_nonnegative_weights ( - ) const - { - const unsigned long NL = ss_feature_extractor::use_BIO_model ? 3 : 5; - if (ss_feature_extractor::allow_negative_weights) - { - return 0; - } - else - { - // We make everything non-negative except for the label transition - // and bias features. - return num_features() - NL*NL - NL; - } - } - - friend void serialize(const feature_extractor& item, std::ostream& out) - { - serialize(item.fe, out); - } - - friend void deserialize(feature_extractor& item, std::istream& in) - { - deserialize(item.fe, in); - } - - unsigned long num_features() const - { - const unsigned long NL = ss_feature_extractor::use_BIO_model ? 3 : 5; - if (ss_feature_extractor::use_high_order_features) - return NL + NL*NL + (NL*NL+NL)*fe.num_features()*fe.window_size(); - else - return NL + NL*NL + NL*fe.num_features()*fe.window_size(); - } - - unsigned long order() const - { - return 1; - } - - unsigned long num_labels() const - { - if (ss_feature_extractor::use_BIO_model) - return 3; - else - return 5; - } - - private: - - template <typename feature_setter> - struct dot_functor - { - /*! - WHAT THIS OBJECT REPRESENTS - This class wraps the feature_setter used by a sequence_labeler - and turns it into the kind needed by a sequence_segmenter. - !*/ - - dot_functor(feature_setter& set_feature_, unsigned long offset_) : - set_feature(set_feature_), offset(offset_) {} - - feature_setter& set_feature; - unsigned long offset; - - inline void operator() ( - unsigned long feat_index - ) - { - set_feature(offset+feat_index); - } - - inline void operator() ( - unsigned long feat_index, - double feat_value - ) - { - set_feature(offset+feat_index, feat_value); - } - }; - - public: - - template <typename EXP> - bool reject_labeling ( - const sequence_type& x, - const matrix_exp<EXP>& y, - unsigned long pos - ) const - { - if (ss_feature_extractor::use_BIO_model) - { - // Don't allow BIO label patterns that don't correspond to a sensical - // segmentation. - if (y.size() > 1 && y(0) == INSIDE && y(1) == OUTSIDE) - return true; - if (y.size() == 1 && y(0) == INSIDE) - return true; - } - else - { - // Don't allow BILOU label patterns that don't correspond to a sensical - // segmentation. - if (y.size() > 1) - { - if (y(1) == BEGIN && y(0) == OUTSIDE) - return true; - if (y(1) == BEGIN && y(0) == UNIT) - return true; - if (y(1) == BEGIN && y(0) == BEGIN) - return true; - - if (y(1) == INSIDE && y(0) == BEGIN) - return true; - if (y(1) == INSIDE && y(0) == OUTSIDE) - return true; - if (y(1) == INSIDE && y(0) == UNIT) - return true; - - if (y(1) == OUTSIDE && y(0) == INSIDE) - return true; - if (y(1) == OUTSIDE && y(0) == LAST) - return true; - - if (y(1) == LAST && y(0) == INSIDE) - return true; - if (y(1) == LAST && y(0) == LAST) - return true; - - if (y(1) == UNIT && y(0) == INSIDE) - return true; - if (y(1) == UNIT && y(0) == LAST) - return true; - - // if at the end of the sequence - if (pos == x.size()-1) - { - if (y(0) == BEGIN) - return true; - if (y(0) == INSIDE) - return true; - } - } - else - { - if (y(0) == INSIDE) - return true; - if (y(0) == LAST) - return true; - - // if at the end of the sequence - if (pos == x.size()-1) - { - if (y(0) == BEGIN) - return true; - } - } - } - return false; - } - - template <typename feature_setter, typename EXP> - void get_features ( - feature_setter& set_feature, - const sequence_type& x, - const matrix_exp<EXP>& y, - unsigned long position - ) const - { - unsigned long offset = 0; - - const int window_size = fe.window_size(); - - const int base_dims = fe.num_features(); - for (int i = 0; i < window_size; ++i) - { - const long pos = i-window_size/2 + static_cast<long>(position); - if (0 <= pos && pos < (long)x.size()) - { - const unsigned long off1 = y(0)*base_dims; - dot_functor<feature_setter> fs1(set_feature, offset+off1); - fe.get_features(fs1, x, pos); - - if (ss_feature_extractor::use_high_order_features && y.size() > 1) - { - const unsigned long off2 = num_labels()*base_dims + (y(0)*num_labels()+y(1))*base_dims; - dot_functor<feature_setter> fs2(set_feature, offset+off2); - fe.get_features(fs2, x, pos); - } - } - - if (ss_feature_extractor::use_high_order_features) - offset += num_labels()*base_dims + num_labels()*num_labels()*base_dims; - else - offset += num_labels()*base_dims; - } - - // Pull out an indicator feature for the type of transition between the - // previous label and the current label. - if (y.size() > 1) - set_feature(offset + y(1)*num_labels() + y(0)); - - offset += num_labels()*num_labels(); - // pull out an indicator feature for the current label. This is the per - // label bias. - set_feature(offset + y(0)); - } - }; - - } // end namespace impl_ss - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - unsigned long total_feature_vector_size ( - const feature_extractor& fe - ) - { - const unsigned long NL = feature_extractor::use_BIO_model ? 3 : 5; - if (feature_extractor::use_high_order_features) - return NL + NL*NL + (NL*NL+NL)*fe.num_features()*fe.window_size(); - else - return NL + NL*NL + NL*fe.num_features()*fe.window_size(); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - class sequence_segmenter - { - public: - typedef typename feature_extractor::sequence_type sample_sequence_type; - typedef std::vector<std::pair<unsigned long, unsigned long> > segmented_sequence_type; - - - sequence_segmenter() - { -#ifdef ENABLE_ASSERTS - const feature_extractor& fe = labeler.get_feature_extractor().fe; - DLIB_ASSERT(fe.window_size() >= 1 && fe.num_features() >= 1, - "\t sequence_segmenter::sequence_segmenter()" - << "\n\t An invalid feature extractor was supplied." - << "\n\t fe.window_size(): " << fe.window_size() - << "\n\t fe.num_features(): " << fe.num_features() - << "\n\t this: " << this - ); -#endif - } - - explicit sequence_segmenter( - const matrix<double,0,1>& weights - ) : - labeler(weights) - { -#ifdef ENABLE_ASSERTS - const feature_extractor& fe = labeler.get_feature_extractor().fe; - // make sure requires clause is not broken - DLIB_ASSERT(total_feature_vector_size(fe) == (unsigned long)weights.size(), - "\t sequence_segmenter::sequence_segmenter(weights)" - << "\n\t These sizes should match" - << "\n\t total_feature_vector_size(fe): " << total_feature_vector_size(fe) - << "\n\t weights.size(): " << weights.size() - << "\n\t this: " << this - ); - DLIB_ASSERT(fe.window_size() >= 1 && fe.num_features() >= 1, - "\t sequence_segmenter::sequence_segmenter()" - << "\n\t An invalid feature extractor was supplied." - << "\n\t fe.window_size(): " << fe.window_size() - << "\n\t fe.num_features(): " << fe.num_features() - << "\n\t this: " << this - ); -#endif - } - - sequence_segmenter( - const matrix<double,0,1>& weights, - const feature_extractor& fe - ) : - labeler(weights, impl_ss::feature_extractor<feature_extractor>(fe)) - { - // make sure requires clause is not broken - DLIB_ASSERT(total_feature_vector_size(fe) == (unsigned long)weights.size(), - "\t sequence_segmenter::sequence_segmenter(weights,fe)" - << "\n\t These sizes should match" - << "\n\t total_feature_vector_size(fe): " << total_feature_vector_size(fe) - << "\n\t weights.size(): " << weights.size() - << "\n\t this: " << this - ); - DLIB_ASSERT(fe.window_size() >= 1 && fe.num_features() >= 1, - "\t sequence_segmenter::sequence_segmenter()" - << "\n\t An invalid feature extractor was supplied." - << "\n\t fe.window_size(): " << fe.window_size() - << "\n\t fe.num_features(): " << fe.num_features() - << "\n\t this: " << this - ); - } - - const feature_extractor& get_feature_extractor ( - ) const { return labeler.get_feature_extractor().fe; } - - const matrix<double,0,1>& get_weights ( - ) const { return labeler.get_weights(); } - - segmented_sequence_type operator() ( - const sample_sequence_type& x - ) const - { - segmented_sequence_type y; - segment_sequence(x,y); - return y; - } - - void segment_sequence ( - const sample_sequence_type& x, - segmented_sequence_type& y - ) const - { - y.clear(); - std::vector<unsigned long> labels; - labeler.label_sequence(x, labels); - - if (feature_extractor::use_BIO_model) - { - // Convert from BIO tagging to the explicit segments representation. - for (unsigned long i = 0; i < labels.size(); ++i) - { - if (labels[i] == impl_ss::BEGIN) - { - const unsigned long begin = i; - ++i; - while (i < labels.size() && labels[i] == impl_ss::INSIDE) - ++i; - - y.push_back(std::make_pair(begin, i)); - --i; - } - } - } - else - { - // Convert from BILOU tagging to the explicit segments representation. - for (unsigned long i = 0; i < labels.size(); ++i) - { - if (labels[i] == impl_ss::BEGIN) - { - const unsigned long begin = i; - ++i; - while (i < labels.size() && labels[i] == impl_ss::INSIDE) - ++i; - - y.push_back(std::make_pair(begin, i+1)); - } - else if (labels[i] == impl_ss::UNIT) - { - y.push_back(std::make_pair(i, i+1)); - } - } - } - } - - friend void serialize(const sequence_segmenter& item, std::ostream& out) - { - int version = 1; - serialize(version, out); - - // Save these just so we can compare them when we deserialize and make - // sure the feature_extractor being used is compatible with the model being - // loaded. - serialize(feature_extractor::use_BIO_model, out); - serialize(feature_extractor::use_high_order_features, out); - serialize(total_feature_vector_size(item.get_feature_extractor()), out); - - serialize(item.labeler, out); - } - - friend void deserialize(sequence_segmenter& item, std::istream& in) - { - int version = 0; - deserialize(version, in); - if (version != 1) - throw serialization_error("Unexpected version found while deserializing dlib::sequence_segmenter."); - - // Try to check if the saved model is compatible with the current feature - // extractor. - bool use_BIO_model, use_high_order_features; - unsigned long dims; - deserialize(use_BIO_model, in); - deserialize(use_high_order_features, in); - deserialize(dims, in); - deserialize(item.labeler, in); - if (use_BIO_model != feature_extractor::use_BIO_model) - { - throw serialization_error("Incompatible feature extractor found while deserializing " - "dlib::sequence_segmenter. Wrong value of use_BIO_model."); - } - if (use_high_order_features != feature_extractor::use_high_order_features) - { - throw serialization_error("Incompatible feature extractor found while deserializing " - "dlib::sequence_segmenter. Wrong value of use_high_order_features."); - } - if (dims != total_feature_vector_size(item.get_feature_extractor())) - { - throw serialization_error("Incompatible feature extractor found while deserializing " - "dlib::sequence_segmenter. Wrong value of total_feature_vector_size()."); - } - } - - private: - sequence_labeler<impl_ss::feature_extractor<feature_extractor> > labeler; - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SEQUENCE_SeGMENTER_H_h_ - - diff --git a/ml/dlib/dlib/svm/sequence_segmenter_abstract.h b/ml/dlib/dlib/svm/sequence_segmenter_abstract.h deleted file mode 100644 index 7229fee22..000000000 --- a/ml/dlib/dlib/svm/sequence_segmenter_abstract.h +++ /dev/null @@ -1,452 +0,0 @@ -// Copyright (C) 2013 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_SEQUENCE_SeGMENTER_ABSTRACT_H_h_ -#ifdef DLIB_SEQUENCE_SeGMENTER_ABSTRACT_H_h_ - -#include "../matrix.h" -#include <vector> -#include "sequence_labeler_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - class example_feature_extractor - { - /*! - WHAT THIS OBJECT REPRESENTS - This object defines the interface a feature extractor must implement if it - is to be used with the sequence_segmenter defined at the bottom of this - file. - - The model used by sequence_segmenter objects is the following. Given an - input sequence x, predict an output label sequence y such that: - y == argmax_Y dot(w, PSI(x,Y)) - Where w is a parameter vector and the label sequence defines a segmentation - of x. - - Recall that a sequence_segmenter uses the BIO or BILOU tagging model and is - also an instantiation of the dlib::sequence_labeler. Selecting to use the - BIO model means that each element of the label sequence y takes on one of - three possible values (B, I, or O) and together these labels define a - segmentation of the sequence. For example, to represent a segmentation of - the sequence of words "The dog ran to Bob Jones" where only "Bob Jones" was - segmented out we would use the label sequence OOOOBI. The BILOU model is - similar except that it uses five different labels and each segment is - labeled as U, BL, BIL, BIIL, BIIIL, and so on depending on its length. - Therefore, the BILOU model is able to more explicitly model the ends of the - segments than the BIO model, but has more parameters to estimate. - - Keeping all this in mind, the purpose of a sequence_segmenter is to take - care of the bookkeeping associated with creating BIO/BILOU tagging models - for segmentation tasks. In particular, it presents the user with a - simplified version of the interface used by the dlib::sequence_labeler. It - does this by completely hiding the BIO/BILOU tags from the user and instead - exposes an explicit sub-segment based labeling representation. It also - simplifies the construction of the PSI() feature vector. - - Like in the dlib::sequence_labeler, PSI() is a sum of feature vectors, each - derived from the entire input sequence x but only part of the label - sequence y. In the case of the sequence_segmenter, we use an order one - Markov model. This means that - PSI(x,y) == sum_i XI(x, y_{i-1}, y_{i}, i) - where the sum is taken over all the elements in the sequence. At each - element we extract a feature vector, XI(), that is expected to encode - important details describing what the i-th position of the sequence looks - like in the context of the current and previous labels. To do this, XI() - is allowed to look at any part of the input sequence x, the current and - previous labels, and of course it must also know the position in question, i. - - The sequence_segmenter simplifies this further by decomposing XI() into - components which model the current window around each position as well as - the conjunction of the current window around each position and the previous - label. In particular, the sequence_segmenter only asks a user to provide a - single feature vector which characterizes a position of the sequence - independent of any labeling. We denote this feature vector by ZI(x,i), where - x is the sequence and i is the position in question. - - For example, suppose we use a window size of 3 and BIO tags, then we can - put all this together and define XI() in terms of ZI(). To do this, we can - think of XI() as containing 12*3 slots which contain either a zero vector - or a ZI() vector. Each combination of window position and labeling has a - different slot. To explain further, consider the following examples where - we have annotated which parts of XI() correspond to each slot. - - If the previous and current label are both B and we use a window size of 3 - then XI() would be instantiated as: - XI(x, B, B, i) = [ZI(x,i-1) \ - ZI(x,i) > If current label is B - ZI(x,i+1) / - 0 \ - 0 > If current label is I - 0 / - 0 \ - 0 > If current label is O - 0 / - - ZI(x,i-1) \ - ZI(x,i) > If previous label is B and current label is B - ZI(x,i+1) / - 0 \ - 0 > If previous label is B and current label is I - 0 / - 0 \ - 0 > If previous label is B and current label is O - 0 / - - 0 \ - 0 > If previous label is I and current label is B - 0 / - 0 \ - 0 > If previous label is I and current label is I - 0 / - 0 \ - 0 > If previous label is I and current label is O - 0 / - - 0 \ - 0 > If previous label is O and current label is B - 0 / - 0 \ - 0 > If previous label is O and current label is I - 0 / - 0 \ - 0 > If previous label is O and current label is O - 0] / - - - If the previous label is I and the current label is O and we use a window - size of 3 then XI() would be instantiated as: - XI(x, I, O, i) = [0 \ - 0 > If current label is B - 0 / - 0 \ - 0 > If current label is I - 0 / - ZI(x,i-1) \ - ZI(x,i) > If current label is O - ZI(x,i+1) / - - 0 \ - 0 > If previous label is B and current label is B - 0 / - 0 \ - 0 > If previous label is B and current label is I - 0 / - 0 \ - 0 > If previous label is B and current label is O - 0 / - - 0 \ - 0 > If previous label is I and current label is B - 0 / - 0 \ - 0 > If previous label is I and current label is I - 0 / - ZI(x,i-1) \ - ZI(x,i) > If previous label is I and current label is O - ZI(x,i+1) / - - 0 \ - 0 > If previous label is O and current label is B - 0 / - 0 \ - 0 > If previous label is O and current label is I - 0 / - 0 \ - 0 > If previous label is O and current label is O - 0] / - - If we had instead used the BILOU tagging model the XI() vector would - have been similarly defined except that there would be 30*3 slots for - the various label combination instead of 12*3. - - Finally, while not shown here, we also include indicator features in - XI() to model label transitions and individual label biases. These are - 12 extra features in the case of the BIO tagging model and 30 extra in - the case of the BILOU tagging model. - - THREAD SAFETY - Instances of this object are required to be threadsafe, that is, it should - be safe for multiple threads to make concurrent calls to the member - functions of this object. - !*/ - - public: - // This should be the type used to represent an input sequence. It can be - // anything so long as it has a .size() which returns the length of the sequence. - typedef the_type_used_to_represent_a_sequence sequence_type; - - // If you want to use the BIO tagging model then set this bool to true. Set it to - // false to use the BILOU tagging model. - const static bool use_BIO_model = true; - - // In the WHAT THIS OBJECT REPRESENTS section above we discussed how we model the - // conjunction of the previous label and the window around each position. Doing - // this greatly expands the size of the parameter vector w. You can optionally - // disable these higher order features by setting the use_high_order_features bool - // to false. This will cause XI() to include only slots which are independent of - // the previous label. - const static bool use_high_order_features = true; - - // You use a tool like the structural_sequence_segmentation_trainer to learn the weight - // vector needed by a sequence_segmenter. You can tell the trainer to force all the - // elements of the weight vector corresponding to ZI() to be non-negative. This is all - // the elements of w except for the elements corresponding to the label transition and - // bias indicator features. To do this, just set allow_negative_weights to false. - const static bool allow_negative_weights = true; - - - example_feature_extractor ( - ); - /*! - ensures - - this object is properly initialized - !*/ - - unsigned long num_features( - ) const; - /*! - ensures - - returns the dimensionality of the ZI() feature vector. This number is - always >= 1 - !*/ - - unsigned long window_size( - ) const; - /*! - ensures - - returns the size of the window ZI() vectors are extracted from. This - number is always >= 1. - !*/ - - template <typename feature_setter> - void get_features ( - feature_setter& set_feature, - const sequence_type& x, - unsigned long position - ) const; - /*! - requires - - position < x.size() - - set_feature is a function object which allows expressions of the form: - - set_features((unsigned long)feature_index, (double)feature_value); - - set_features((unsigned long)feature_index); - ensures - - This function computes the ZI(x,position) feature vector. This is a - feature vector which should capture the properties of x[position] that - are informative relative to the sequence segmentation task you are trying - to perform. - - ZI(x,position) is returned as a sparse vector by invoking set_feature(). - For example, to set the feature with an index of 55 to the value of 1 - this method would call: - set_feature(55); - Or equivalently: - set_feature(55,1); - Therefore, the first argument to set_feature is the index of the feature - to be set while the second argument is the value the feature should take. - Additionally, note that calling set_feature() multiple times with the - same feature index does NOT overwrite the old value, it adds to the - previous value. For example, if you call set_feature(55) 3 times then it - will result in feature 55 having a value of 3. - - This function only calls set_feature() with feature_index values < num_features() - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - void serialize( - const example_feature_extractor& item, - std::ostream& out - ); - /*! - provides serialization support - !*/ - - void deserialize( - example_feature_extractor& item, - std::istream& in - ); - /*! - provides deserialization support - !*/ - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - unsigned long total_feature_vector_size ( - const feature_extractor& fe - ); - /*! - requires - - fe must be an object that implements an interface compatible with the - example_feature_extractor discussed above. - ensures - - returns the dimensionality of the PSI() vector defined by the given feature - extractor. - !*/ - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - class sequence_segmenter - { - /*! - REQUIREMENTS ON feature_extractor - It must be an object that implements an interface compatible with - the example_feature_extractor discussed above. - - WHAT THIS OBJECT REPRESENTS - This object is a tool for segmenting a sequence of objects into a set of - non-overlapping chunks. An example sequence segmentation task is to take - English sentences and identify all the named entities. In this example, - you would be using a sequence_segmenter to find all the chunks of - contiguous words which refer to proper names. - - Internally, the sequence_segmenter uses the BIO (Begin, Inside, Outside) or - BILOU (Begin, Inside, Last, Outside, Unit) sequence tagging model. - Moreover, it is implemented using a dlib::sequence_labeler object and - therefore sequence_segmenter objects are examples of chain structured - conditional random field style sequence taggers. - - THREAD SAFETY - It is always safe to use distinct instances of this object in different - threads. However, when a single instance is shared between threads then - the following rules apply: - It is safe to call the const members of this object from multiple - threads so long as the feature_extractor is also threadsafe. This is - because the const members are purely read-only operations. However, - any operation that modifies a sequence_segmenter is not threadsafe. - !*/ - - public: - typedef typename feature_extractor::sequence_type sample_sequence_type; - typedef std::vector<std::pair<unsigned long, unsigned long> > segmented_sequence_type; - - sequence_segmenter( - ); - /*! - ensures - - #get_feature_extractor() == feature_extractor() - (i.e. it will have its default value) - - #get_weights().size() == total_feature_vector_size(#get_feature_extractor()) - - #get_weights() == 0 - !*/ - - explicit sequence_segmenter( - const matrix<double,0,1>& weights - ); - /*! - requires - - total_feature_vector_size(feature_extractor()) == weights.size() - ensures - - #get_feature_extractor() == feature_extractor() - (i.e. it will have its default value) - - #get_weights() == weights - !*/ - - sequence_segmenter( - const matrix<double,0,1>& weights, - const feature_extractor& fe - ); - /*! - requires - - total_feature_vector_size(fe) == weights.size() - ensures - - #get_feature_extractor() == fe - - #get_weights() == weights - !*/ - - const feature_extractor& get_feature_extractor ( - ) const; - /*! - ensures - - returns the feature extractor used by this object. - !*/ - - const matrix<double,0,1>& get_weights ( - ) const; - /*! - ensures - - returns the parameter vector associated with this sequence segmenter. - The length of the vector is total_feature_vector_size(get_feature_extractor()). - !*/ - - segmented_sequence_type operator() ( - const sample_sequence_type& x - ) const; - /*! - ensures - - Takes an input sequence and returns a list of detected segments within - that sequence. - - None of the returned segments will overlap. - - The returned segments are listed in the order they appeared in the input sequence. - - To be precise, this function returns a std::vector Y of segments such that: - - Y.size() == the number of segments detected in the input sequence x. - - for all valid i: - - Y[i].first == the start of the i-th segment. - - Y[i].second == one past the end of the i-th segment. - - Therefore, the i-th detected segment in x is composed of the elements - x[Y[i].first], x[Y[i].first+1], ..., x[Y[i].second-1] - - Y[i].first < x.size() - - Y[i].second <= x.size() - - Y[i].first < Y[i].second - (i.e. This function never outputs empty segments) - - Y[i].second <= Y[i+1].first - (i.e. the segments are listed in order of appearance and do not overlap) - !*/ - - void segment_sequence ( - const sample_sequence_type& x, - segmented_sequence_type& y - ) const; - /*! - ensures - - #y == (*this)(x) - (i.e. This is just another interface to the operator() routine - above. This one avoids returning the results by value and therefore - might be a little faster in some cases) - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - void serialize ( - const sequence_segmenter<feature_extractor>& item, - std::ostream& out - ); - /*! - provides serialization support - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - void deserialize ( - sequence_segmenter<feature_extractor>& item, - std::istream& in - ); - /*! - provides deserialization support - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SEQUENCE_SeGMENTER_ABSTRACT_H_h_ - diff --git a/ml/dlib/dlib/svm/simplify_linear_decision_function.h b/ml/dlib/dlib/svm/simplify_linear_decision_function.h deleted file mode 100644 index 4f5bef6f3..000000000 --- a/ml/dlib/dlib/svm/simplify_linear_decision_function.h +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_SIMPLIFY_LINEAR_DECiSION_FUNCTION_Hh_ -#define DLIB_SIMPLIFY_LINEAR_DECiSION_FUNCTION_Hh_ - -#include "simplify_linear_decision_function_abstract.h" -#include "../algs.h" -#include "function.h" -#include "sparse_kernel.h" -#include "kernel.h" -#include <map> -#include <vector> - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - decision_function<sparse_linear_kernel<T> > simplify_linear_decision_function ( - const decision_function<sparse_linear_kernel<T> >& df - ) - { - // don't do anything if we don't have to - if (df.basis_vectors.size() <= 1) - return df; - - decision_function<sparse_linear_kernel<T> > new_df; - - new_df.b = df.b; - new_df.basis_vectors.set_size(1); - new_df.alpha.set_size(1); - new_df.alpha(0) = 1; - - // now compute the weighted sum of all the sparse basis_vectors in df - typedef typename T::value_type pair_type; - typedef typename pair_type::first_type key_type; - typedef typename pair_type::second_type value_type; - std::map<key_type, value_type> accum; - for (long i = 0; i < df.basis_vectors.size(); ++i) - { - typename T::const_iterator j = df.basis_vectors(i).begin(); - const typename T::const_iterator end = df.basis_vectors(i).end(); - for (; j != end; ++j) - { - accum[j->first] += df.alpha(i) * (j->second); - } - } - - new_df.basis_vectors(0) = T(accum.begin(), accum.end()); - - return new_df; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - decision_function<linear_kernel<T> > simplify_linear_decision_function ( - const decision_function<linear_kernel<T> >& df - ) - { - // don't do anything if we don't have to - if (df.basis_vectors.size() <= 1) - return df; - - decision_function<linear_kernel<T> > new_df; - - new_df.b = df.b; - new_df.basis_vectors.set_size(1); - new_df.alpha.set_size(1); - new_df.alpha(0) = 1; - - // now compute the weighted sum of all the basis_vectors in df - new_df.basis_vectors(0) = 0; - for (long i = 0; i < df.basis_vectors.size(); ++i) - { - new_df.basis_vectors(0) += df.alpha(i) * df.basis_vectors(i); - } - - return new_df; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - decision_function<linear_kernel<T> > simplify_linear_decision_function ( - const normalized_function<decision_function<linear_kernel<T> >, vector_normalizer<T> >& df - ) - { - decision_function<linear_kernel<T> > new_df = simplify_linear_decision_function(df.function); - - // now incorporate the normalization stuff into new_df - new_df.basis_vectors(0) = pointwise_multiply(new_df.basis_vectors(0), df.normalizer.std_devs()); - new_df.b += dot(new_df.basis_vectors(0), df.normalizer.means()); - - return new_df; - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SIMPLIFY_LINEAR_DECiSION_FUNCTION_Hh_ - diff --git a/ml/dlib/dlib/svm/simplify_linear_decision_function_abstract.h b/ml/dlib/dlib/svm/simplify_linear_decision_function_abstract.h deleted file mode 100644 index cff8ae11f..000000000 --- a/ml/dlib/dlib/svm/simplify_linear_decision_function_abstract.h +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_SIMPLIFY_LINEAR_DECiSION_FUNCTION_ABSTRACT_Hh_ -#ifdef DLIB_SIMPLIFY_LINEAR_DECiSION_FUNCTION_ABSTRACT_Hh_ - -#include "../algs.h" -#include "function_abstract.h" -#include "sparse_kernel_abstract.h" -#include "kernel_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - decision_function<sparse_linear_kernel<T> > simplify_linear_decision_function ( - const decision_function<sparse_linear_kernel<T> >& df - ); - /*! - requires - - T must be a sparse vector as defined in dlib/svm/sparse_vector_abstract.h - ensures - - returns a simplified version of df that only has one basis vector. That - is, returns a decision function D such that: - - D.basis_vectors.size() == 1 (or 0 if df is empty) - - for all possible x: D(x) == df(x) - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - decision_function<linear_kernel<T> > simplify_linear_decision_function ( - const decision_function<linear_kernel<T> >& df - ); - /*! - requires - - T must be a dlib::matrix object - ensures - - returns a simplified version of df that only has one basis vector. That - is, returns a decision function D such that: - - D.basis_vectors.size() == 1 (or 0 if df is empty) - - for all possible x: D(x) == df(x) - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - decision_function<linear_kernel<T> > simplify_linear_decision_function ( - const normalized_function<decision_function<linear_kernel<T> >, vector_normalizer<T> >& df - ); - /*! - requires - - T must be a dlib::matrix object - ensures - - returns a simplified version of df that only has one basis vector and - doesn't involve an explicit vector_normalizer. That is, returns a - decision function D such that: - - D.basis_vectors.size() == 1 (or 0 if df is empty) - - for all possible x: D(x) == df(x) - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SIMPLIFY_LINEAR_DECiSION_FUNCTION_ABSTRACT_Hh_ - diff --git a/ml/dlib/dlib/svm/sort_basis_vectors.h b/ml/dlib/dlib/svm/sort_basis_vectors.h deleted file mode 100644 index 1d4605b41..000000000 --- a/ml/dlib/dlib/svm/sort_basis_vectors.h +++ /dev/null @@ -1,224 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_SORT_BASIS_VECTORs_Hh_ -#define DLIB_SORT_BASIS_VECTORs_Hh_ - -#include <vector> - -#include "sort_basis_vectors_abstract.h" -#include "../matrix.h" -#include "../statistics.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - namespace bs_impl - { - template <typename EXP> - typename EXP::matrix_type invert ( - const matrix_exp<EXP>& m - ) - { - eigenvalue_decomposition<EXP> eig(make_symmetric(m)); - - typedef typename EXP::type scalar_type; - typedef typename EXP::mem_manager_type mm_type; - - matrix<scalar_type,0,1,mm_type> vals = eig.get_real_eigenvalues(); - - const scalar_type max_eig = max(abs(vals)); - const scalar_type thresh = max_eig*std::sqrt(std::numeric_limits<scalar_type>::epsilon()); - - // Since m might be singular or almost singular we need to do something about - // any very small eigenvalues. So here we set the smallest eigenvalues to - // be equal to a large value to make the inversion stable. We can't just set - // them to zero like in a normal pseudo-inverse since we want the resulting - // inverse matrix to be full rank. - for (long i = 0; i < vals.size(); ++i) - { - if (std::abs(vals(i)) < thresh) - vals(i) = max_eig; - } - - // Build the inverse matrix. This is basically a pseudo-inverse. - return make_symmetric(eig.get_pseudo_v()*diagm(reciprocal(vals))*trans(eig.get_pseudo_v())); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename kernel_type, - typename vect1_type, - typename vect2_type, - typename vect3_type - > - const std::vector<typename kernel_type::sample_type> sort_basis_vectors_impl ( - const kernel_type& kern, - const vect1_type& samples, - const vect2_type& labels, - const vect3_type& basis, - double eps - ) - { - DLIB_ASSERT(is_binary_classification_problem(samples, labels) && - 0 < eps && eps <= 1 && - basis.size() > 0, - "\t void sort_basis_vectors()" - << "\n\t Invalid arguments were given to this function." - << "\n\t is_binary_classification_problem(samples, labels): " << is_binary_classification_problem(samples, labels) - << "\n\t basis.size(): " << basis.size() - << "\n\t eps: " << eps - ); - - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::mem_manager_type mm_type; - - typedef matrix<scalar_type,0,1,mm_type> col_matrix; - typedef matrix<scalar_type,0,0,mm_type> gen_matrix; - - col_matrix c1_mean, c2_mean, temp, delta; - - - col_matrix weights; - - running_covariance<gen_matrix> cov; - - // compute the covariance matrix and the means of the two classes. - for (long i = 0; i < samples.size(); ++i) - { - temp = kernel_matrix(kern, basis, samples(i)); - cov.add(temp); - if (labels(i) > 0) - c1_mean += temp; - else - c2_mean += temp; - } - - c1_mean /= sum(labels > 0); - c2_mean /= sum(labels < 0); - - delta = c1_mean - c2_mean; - - gen_matrix cov_inv = bs_impl::invert(cov.covariance()); - - - matrix<long,0,1,mm_type> total_perm = trans(range(0, delta.size()-1)); - matrix<long,0,1,mm_type> perm = total_perm; - - std::vector<std::pair<scalar_type,long> > sorted_feats(delta.size()); - - long best_size = delta.size(); - long misses = 0; - matrix<long,0,1,mm_type> best_total_perm = perm; - - // Now we basically find fisher's linear discriminant over and over. Each - // time sorting the features so that the most important ones pile up together. - weights = trans(chol(cov_inv))*delta; - while (true) - { - - for (unsigned long i = 0; i < sorted_feats.size(); ++i) - sorted_feats[i] = make_pair(std::abs(weights(i)), i); - - std::sort(sorted_feats.begin(), sorted_feats.end()); - - // make a permutation vector according to the sorting - for (long i = 0; i < perm.size(); ++i) - perm(i) = sorted_feats[i].second; - - - // Apply the permutation. Doing this gives the same result as permuting all the - // features and then recomputing the delta and cov_inv from scratch. - cov_inv = subm(cov_inv,perm,perm); - delta = rowm(delta,perm); - - // Record all the permutations we have done so we will know how the final - // weights match up with the original basis vectors when we are done. - total_perm = rowm(total_perm, perm); - - // compute new Fisher weights for sorted features. - weights = trans(chol(cov_inv))*delta; - - // Measure how many features it takes to account for eps% of the weights vector. - const scalar_type total_weight = length_squared(weights); - scalar_type weight_accum = 0; - long size = 0; - // figure out how to get eps% of the weights - for (long i = weights.size()-1; i >= 0; --i) - { - ++size; - weight_accum += weights(i)*weights(i); - if (weight_accum/total_weight > eps) - break; - } - - // loop until the best_size stops dropping - if (size < best_size) - { - misses = 0; - best_size = size; - best_total_perm = total_perm; - } - else - { - ++misses; - - // Give up once we have had 10 rounds where we didn't find a weights vector with - // a smaller concentration of good features. - if (misses >= 10) - break; - } - - } - - // make sure best_size isn't zero - if (best_size == 0) - best_size = 1; - - std::vector<typename kernel_type::sample_type> sorted_basis; - - // permute the basis so that it matches up with the contents of the best weights - sorted_basis.resize(best_size); - for (unsigned long i = 0; i < sorted_basis.size(); ++i) - { - // Note that we load sorted_basis backwards so that the most important - // basis elements come first. - sorted_basis[i] = basis(best_total_perm(basis.size()-i-1)); - } - - return sorted_basis; - } - - } - -// ---------------------------------------------------------------------------------------- - - template < - typename kernel_type, - typename vect1_type, - typename vect2_type, - typename vect3_type - > - const std::vector<typename kernel_type::sample_type> sort_basis_vectors ( - const kernel_type& kern, - const vect1_type& samples, - const vect2_type& labels, - const vect3_type& basis, - double eps = 0.99 - ) - { - return bs_impl::sort_basis_vectors_impl(kern, - mat(samples), - mat(labels), - mat(basis), - eps); - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SORT_BASIS_VECTORs_Hh_ - diff --git a/ml/dlib/dlib/svm/sort_basis_vectors_abstract.h b/ml/dlib/dlib/svm/sort_basis_vectors_abstract.h deleted file mode 100644 index b43dca170..000000000 --- a/ml/dlib/dlib/svm/sort_basis_vectors_abstract.h +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_SORT_BASIS_VECTORs_ABSTRACT_Hh_ -#ifdef DLIB_SORT_BASIS_VECTORs_ABSTRACT_Hh_ - -#include <vector> - -#include "../matrix.h" -#include "../statistics.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename kernel_type, - typename vect1_type, - typename vect2_type, - typename vect3_type - > - const std::vector<typename kernel_type::sample_type> sort_basis_vectors ( - const kernel_type& kern, - const vect1_type& samples, - const vect2_type& labels, - const vect3_type& basis, - double eps = 0.99 - ); - /*! - requires - - is_binary_classification_problem(samples, labels) - - 0 < eps <= 1 - - basis.size() > 0 - - kernel_type is a kernel function object as defined in dlib/svm/kernel_abstract.h - It must be capable of operating on the elements of samples and basis. - - vect1_type == a matrix or something convertible to a matrix via mat() - - vect2_type == a matrix or something convertible to a matrix via mat() - - vect3_type == a matrix or something convertible to a matrix via mat() - ensures - - A kernel based learning method ultimately needs to select a set of basis functions - represented by a particular choice of kernel and a set of basis vectors. - sort_basis_vectors() attempts to order the elements of basis so that elements which are - most useful in solving the binary classification problem defined by samples and - labels come first. - - In particular, this function returns a std::vector, SB, of sorted basis vectors such that: - - 0 < SB.size() <= basis.size() - - SB will contain elements from basis but they will have been sorted so that - the most useful elements come first (i.e. SB[0] is the most important). - - eps notionally controls how big SB will be. Bigger eps corresponds to a - bigger basis. You can think of it like asking for eps percent of the - discriminating power from the input basis. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SORT_BASIS_VECTORs_ABSTRACT_Hh_ - diff --git a/ml/dlib/dlib/svm/sparse_kernel.h b/ml/dlib/dlib/svm/sparse_kernel.h deleted file mode 100644 index f571135ec..000000000 --- a/ml/dlib/dlib/svm/sparse_kernel.h +++ /dev/null @@ -1,384 +0,0 @@ -// Copyright (C) 2009 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_SVm_SPARSE_KERNEL -#define DLIB_SVm_SPARSE_KERNEL - -#include "sparse_kernel_abstract.h" -#include <cmath> -#include <limits> -#include "../algs.h" -#include "../serialize.h" -#include "sparse_vector.h" - - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - struct sparse_radial_basis_kernel - { - typedef typename T::value_type::second_type scalar_type; - typedef T sample_type; - typedef default_memory_manager mem_manager_type; - - sparse_radial_basis_kernel(const scalar_type g) : gamma(g) {} - sparse_radial_basis_kernel() : gamma(0.1) {} - sparse_radial_basis_kernel( - const sparse_radial_basis_kernel& k - ) : gamma(k.gamma) {} - - - const scalar_type gamma; - - scalar_type operator() ( - const sample_type& a, - const sample_type& b - ) const - { - const scalar_type d = distance_squared(a,b); - return std::exp(-gamma*d); - } - - sparse_radial_basis_kernel& operator= ( - const sparse_radial_basis_kernel& k - ) - { - const_cast<scalar_type&>(gamma) = k.gamma; - return *this; - } - - bool operator== ( - const sparse_radial_basis_kernel& k - ) const - { - return gamma == k.gamma; - } - }; - - template < - typename T - > - void serialize ( - const sparse_radial_basis_kernel<T>& item, - std::ostream& out - ) - { - try - { - serialize(item.gamma, out); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while serializing object of type sparse_radial_basis_kernel"); - } - } - - template < - typename T - > - void deserialize ( - sparse_radial_basis_kernel<T>& item, - std::istream& in - ) - { - typedef typename T::value_type::second_type scalar_type; - try - { - deserialize(const_cast<scalar_type&>(item.gamma), in); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while deserializing object of type sparse_radial_basis_kernel"); - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - struct sparse_polynomial_kernel - { - typedef typename T::value_type::second_type scalar_type; - typedef T sample_type; - typedef default_memory_manager mem_manager_type; - - sparse_polynomial_kernel(const scalar_type g, const scalar_type c, const scalar_type d) : gamma(g), coef(c), degree(d) {} - sparse_polynomial_kernel() : gamma(1), coef(0), degree(1) {} - sparse_polynomial_kernel( - const sparse_polynomial_kernel& k - ) : gamma(k.gamma), coef(k.coef), degree(k.degree) {} - - typedef T type; - const scalar_type gamma; - const scalar_type coef; - const scalar_type degree; - - scalar_type operator() ( - const sample_type& a, - const sample_type& b - ) const - { - return std::pow(gamma*(dot(a,b)) + coef, degree); - } - - sparse_polynomial_kernel& operator= ( - const sparse_polynomial_kernel& k - ) - { - const_cast<scalar_type&>(gamma) = k.gamma; - const_cast<scalar_type&>(coef) = k.coef; - const_cast<scalar_type&>(degree) = k.degree; - return *this; - } - - bool operator== ( - const sparse_polynomial_kernel& k - ) const - { - return (gamma == k.gamma) && (coef == k.coef) && (degree == k.degree); - } - }; - - template < - typename T - > - void serialize ( - const sparse_polynomial_kernel<T>& item, - std::ostream& out - ) - { - try - { - serialize(item.gamma, out); - serialize(item.coef, out); - serialize(item.degree, out); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while serializing object of type sparse_polynomial_kernel"); - } - } - - template < - typename T - > - void deserialize ( - sparse_polynomial_kernel<T>& item, - std::istream& in - ) - { - typedef typename T::value_type::second_type scalar_type; - try - { - deserialize(const_cast<scalar_type&>(item.gamma), in); - deserialize(const_cast<scalar_type&>(item.coef), in); - deserialize(const_cast<scalar_type&>(item.degree), in); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while deserializing object of type sparse_polynomial_kernel"); - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - struct sparse_sigmoid_kernel - { - typedef typename T::value_type::second_type scalar_type; - typedef T sample_type; - typedef default_memory_manager mem_manager_type; - - sparse_sigmoid_kernel(const scalar_type g, const scalar_type c) : gamma(g), coef(c) {} - sparse_sigmoid_kernel() : gamma(0.1), coef(-1.0) {} - sparse_sigmoid_kernel( - const sparse_sigmoid_kernel& k - ) : gamma(k.gamma), coef(k.coef) {} - - typedef T type; - const scalar_type gamma; - const scalar_type coef; - - scalar_type operator() ( - const sample_type& a, - const sample_type& b - ) const - { - return std::tanh(gamma*(dot(a,b)) + coef); - } - - sparse_sigmoid_kernel& operator= ( - const sparse_sigmoid_kernel& k - ) - { - const_cast<scalar_type&>(gamma) = k.gamma; - const_cast<scalar_type&>(coef) = k.coef; - return *this; - } - - bool operator== ( - const sparse_sigmoid_kernel& k - ) const - { - return (gamma == k.gamma) && (coef == k.coef); - } - }; - - template < - typename T - > - void serialize ( - const sparse_sigmoid_kernel<T>& item, - std::ostream& out - ) - { - try - { - serialize(item.gamma, out); - serialize(item.coef, out); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while serializing object of type sparse_sigmoid_kernel"); - } - } - - template < - typename T - > - void deserialize ( - sparse_sigmoid_kernel<T>& item, - std::istream& in - ) - { - typedef typename T::value_type::second_type scalar_type; - try - { - deserialize(const_cast<scalar_type&>(item.gamma), in); - deserialize(const_cast<scalar_type&>(item.coef), in); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while deserializing object of type sparse_sigmoid_kernel"); - } - } - -// ---------------------------------------------------------------------------------------- - - template <typename T> - struct sparse_linear_kernel - { - typedef typename T::value_type::second_type scalar_type; - typedef T sample_type; - typedef default_memory_manager mem_manager_type; - - scalar_type operator() ( - const sample_type& a, - const sample_type& b - ) const - { - return dot(a,b); - } - - bool operator== ( - const sparse_linear_kernel& - ) const - { - return true; - } - }; - - template < - typename T - > - void serialize ( - const sparse_linear_kernel<T>& , - std::ostream& - ){} - - template < - typename T - > - void deserialize ( - sparse_linear_kernel<T>& , - std::istream& - ){} - -// ---------------------------------------------------------------------------------------- - - template <typename T> - struct sparse_histogram_intersection_kernel - { - typedef typename T::value_type::second_type scalar_type; - typedef T sample_type; - typedef default_memory_manager mem_manager_type; - - scalar_type operator() ( - const sample_type& a, - const sample_type& b - ) const - { - typename sample_type::const_iterator ai = a.begin(); - typename sample_type::const_iterator bi = b.begin(); - - scalar_type sum = 0; - while (ai != a.end() && bi != b.end()) - { - if (ai->first == bi->first) - { - sum += std::min(ai->second , bi->second); - ++ai; - ++bi; - } - else if (ai->first < bi->first) - { - ++ai; - } - else - { - ++bi; - } - } - - return sum; - } - - bool operator== ( - const sparse_histogram_intersection_kernel& - ) const - { - return true; - } - }; - - template < - typename T - > - void serialize ( - const sparse_histogram_intersection_kernel<T>& , - std::ostream& - ){} - - template < - typename T - > - void deserialize ( - sparse_histogram_intersection_kernel<T>& , - std::istream& - ){} - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SVm_SPARSE_KERNEL - - - diff --git a/ml/dlib/dlib/svm/sparse_kernel_abstract.h b/ml/dlib/dlib/svm/sparse_kernel_abstract.h deleted file mode 100644 index 55f9d7caa..000000000 --- a/ml/dlib/dlib/svm/sparse_kernel_abstract.h +++ /dev/null @@ -1,486 +0,0 @@ -// Copyright (C) 2009 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_SVm_SPARSE_KERNEL_ABSTRACT_ -#ifdef DLIB_SVm_SPARSE_KERNEL_ABSTRACT_ - -#include <cmath> -#include <limits> -#include "../algs.h" -#include "../serialize.h" -#include "kernel_abstract.h" -#include "sparse_vector_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - struct sparse_radial_basis_kernel - { - /*! - REQUIREMENTS ON T - Must be a sparse vector as defined in dlib/svm/sparse_vector_abstract.h - - WHAT THIS OBJECT REPRESENTS - This object represents a radial basis function kernel - that works with sparse vectors. - - THREAD SAFETY - This kernel is threadsafe. - !*/ - - typedef typename T::value_type::second_type scalar_type; - typedef T sample_type; - typedef default_memory_manager mem_manager_type; - - const scalar_type gamma; - - sparse_radial_basis_kernel( - ); - /*! - ensures - - #gamma == 0.1 - !*/ - - sparse_radial_basis_kernel( - const sparse_radial_basis_kernel& k - ); - /*! - ensures - - #gamma == k.gamma - !*/ - - sparse_radial_basis_kernel( - const scalar_type g - ); - /*! - ensures - - #gamma == g - !*/ - - scalar_type operator() ( - const sample_type& a, - const sample_type& b - ) const; - /*! - requires - - a is a sparse vector - - b is a sparse vector - ensures - - returns exp(-gamma * distance_squared(a,b)) - !*/ - - sparse_radial_basis_kernel& operator= ( - const sparse_radial_basis_kernel& k - ); - /*! - ensures - - #gamma = k.gamma - - returns *this - !*/ - - bool operator== ( - const sparse_radial_basis_kernel& k - ) const; - /*! - ensures - - if (k and *this are identical) then - - returns true - - else - - returns false - !*/ - - }; - - template < - typename T - > - void serialize ( - const sparse_radial_basis_kernel<T>& item, - std::ostream& out - ); - /*! - provides serialization support for sparse_radial_basis_kernel - !*/ - - template < - typename T - > - void deserialize ( - sparse_radial_basis_kernel<T>& item, - std::istream& in - ); - /*! - provides deserialization support for sparse_radial_basis_kernel - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - struct sparse_sigmoid_kernel - { - /*! - REQUIREMENTS ON T - Must be a sparse vector as defined in dlib/svm/sparse_vector_abstract.h - - WHAT THIS OBJECT REPRESENTS - This object represents a sigmoid kernel - that works with sparse vectors. - - THREAD SAFETY - This kernel is threadsafe. - !*/ - - typedef typename T::value_type::second_type scalar_type; - typedef T sample_type; - typedef default_memory_manager mem_manager_type; - - const scalar_type gamma; - const scalar_type coef; - - sparse_sigmoid_kernel( - ); - /*! - ensures - - #gamma == 0.1 - - #coef == -1.0 - !*/ - - sparse_sigmoid_kernel( - const sparse_sigmoid_kernel& k - ); - /*! - ensures - - #gamma == k.gamma - - #coef == k.coef - !*/ - - sparse_sigmoid_kernel( - const scalar_type g, - const scalar_type c - ); - /*! - ensures - - #gamma == g - - #coef == c - !*/ - - scalar_type operator() ( - const sample_type& a, - const sample_type& b - ) const; - /*! - requires - - a is a sparse vector - - b is a sparse vector - ensures - - returns tanh(gamma * dot(a,b) + coef) - !*/ - - sparse_sigmoid_kernel& operator= ( - const sparse_sigmoid_kernel& k - ); - /*! - ensures - - #gamma = k.gamma - - #coef = k.coef - - returns *this - !*/ - - bool operator== ( - const sparse_sigmoid_kernel& k - ) const; - /*! - ensures - - if (k and *this are identical) then - - returns true - - else - - returns false - !*/ - }; - - template < - typename T - > - void serialize ( - const sparse_sigmoid_kernel<T>& item, - std::ostream& out - ); - /*! - provides serialization support for sparse_sigmoid_kernel - !*/ - - template < - typename T - > - void deserialize ( - sparse_sigmoid_kernel<T>& item, - std::istream& in - ); - /*! - provides deserialization support for sparse_sigmoid_kernel - !*/ - - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - struct sparse_polynomial_kernel - { - /*! - REQUIREMENTS ON T - Must be a sparse vector as defined in dlib/svm/sparse_vector_abstract.h - - WHAT THIS OBJECT REPRESENTS - This object represents a polynomial kernel - that works with sparse vectors. - - THREAD SAFETY - This kernel is threadsafe. - !*/ - - typedef typename T::value_type::second_type scalar_type; - typedef T sample_type; - typedef default_memory_manager mem_manager_type; - - const scalar_type gamma; - const scalar_type coef; - const scalar_type degree; - - sparse_polynomial_kernel( - ); - /*! - ensures - - #gamma == 1 - - #coef == 0 - - #degree == 1 - !*/ - - sparse_polynomial_kernel( - const sparse_polynomial_kernel& k - ); - /*! - ensures - - #gamma == k.gamma - - #coef == k.coef - - #degree == k.degree - !*/ - - sparse_polynomial_kernel( - const scalar_type g, - const scalar_type c, - const scalar_type d - ); - /*! - ensures - - #gamma == g - - #coef == c - - #degree == d - !*/ - - scalar_type operator() ( - const sample_type& a, - const sample_type& b - ) const; - /*! - requires - - a is a sparse vector - - b is a sparse vector - ensures - - returns pow(gamma * dot(a,b) + coef, degree) - !*/ - - sparse_polynomial_kernel& operator= ( - const sparse_polynomial_kernel& k - ); - /*! - ensures - - #gamma = k.gamma - - #coef = k.coef - - #degree = k.degree - - returns *this - !*/ - - bool operator== ( - const sparse_polynomial_kernel& k - ) const; - /*! - ensures - - if (k and *this are identical) then - - returns true - - else - - returns false - !*/ - }; - - template < - typename T - > - void serialize ( - const sparse_polynomial_kernel<T>& item, - std::ostream& out - ); - /*! - provides serialization support for sparse_polynomial_kernel - !*/ - - template < - typename T - > - void deserialize ( - sparse_polynomial_kernel<T>& item, - std::istream& in - ); - /*! - provides deserialization support for sparse_polynomial_kernel - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - struct sparse_linear_kernel - { - /*! - REQUIREMENTS ON T - Must be a sparse vector as defined in dlib/svm/sparse_vector_abstract.h - - WHAT THIS OBJECT REPRESENTS - This object represents a linear function kernel - that works with sparse vectors. - - THREAD SAFETY - This kernel is threadsafe. - !*/ - - typedef typename T::value_type::second_type scalar_type; - typedef T sample_type; - typedef default_memory_manager mem_manager_type; - - scalar_type operator() ( - const sample_type& a, - const sample_type& b - ) const; - /*! - requires - - a is a sparse vector - - b is a sparse vector - ensures - - returns dot(a,b) - !*/ - - bool operator== ( - const sparse_linear_kernel& k - ) const; - /*! - ensures - - returns true - !*/ - }; - - template < - typename T - > - void serialize ( - const sparse_linear_kernel<T>& item, - std::ostream& out - ); - /*! - provides serialization support for sparse_linear_kernel - !*/ - - template < - typename T - > - void deserialize ( - sparse_linear_kernel<T>& item, - std::istream& in - ); - /*! - provides deserialization support for sparse_linear_kernel - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - struct sparse_histogram_intersection_kernel - { - /*! - REQUIREMENTS ON T - Must be a sparse vector as defined in dlib/svm/sparse_vector_abstract.h - - WHAT THIS OBJECT REPRESENTS - This object represents a histogram intersection kernel - that works with sparse vectors. - - THREAD SAFETY - This kernel is threadsafe. - !*/ - - typedef typename T::value_type::second_type scalar_type; - typedef T sample_type; - typedef default_memory_manager mem_manager_type; - - scalar_type operator() ( - const sample_type& a, - const sample_type& b - ) const; - /*! - requires - - a is a sparse vector - - b is a sparse vector - - all the values in a and b are >= 0 - ensures - - Let A(i) denote the value of the ith dimension of the a vector. - - Let B(i) denote the value of the ith dimension of the b vector. - - returns sum over all i: std::min(A(i), B(i)) - !*/ - - bool operator== ( - const sparse_histogram_intersection_kernel& k - ) const; - /*! - ensures - - returns true - !*/ - }; - - template < - typename T - > - void serialize ( - const sparse_histogram_intersection_kernel<T>& item, - std::ostream& out - ); - /*! - provides serialization support for sparse_histogram_intersection_kernel - !*/ - - template < - typename T - > - void deserialize ( - sparse_histogram_intersection_kernel<T>& item, - std::istream& in - ); - /*! - provides deserialization support for sparse_histogram_intersection_kernel - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SVm_SPARSE_KERNEL_ABSTRACT_ - - diff --git a/ml/dlib/dlib/svm/sparse_vector.h b/ml/dlib/dlib/svm/sparse_vector.h deleted file mode 100644 index c42723f89..000000000 --- a/ml/dlib/dlib/svm/sparse_vector.h +++ /dev/null @@ -1,1170 +0,0 @@ -// Copyright (C) 2009 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_SVm_SPARSE_VECTOR -#define DLIB_SVm_SPARSE_VECTOR - -#include "sparse_vector_abstract.h" -#include <cmath> -#include <limits> -#include "../algs.h" -#include <vector> -#include <map> -#include "../graph_utils/edge_list_graphs.h" -#include "../matrix.h" - - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template <typename T, typename U> - typename T::value_type::second_type distance_squared ( - const T& a, - const U& b - ) - { - typedef typename T::value_type::second_type scalar_type; - typedef typename U::value_type::second_type scalar_typeU; - // Both T and U must contain the same kinds of elements - COMPILE_TIME_ASSERT((is_same_type<scalar_type, scalar_typeU>::value)); - - typename T::const_iterator ai = a.begin(); - typename U::const_iterator bi = b.begin(); - - scalar_type sum = 0, temp = 0; - while (ai != a.end() && bi != b.end()) - { - if (ai->first == bi->first) - { - temp = ai->second - bi->second; - ++ai; - ++bi; - } - else if (ai->first < bi->first) - { - temp = ai->second; - ++ai; - } - else - { - temp = bi->second; - ++bi; - } - - sum += temp*temp; - } - - while (ai != a.end()) - { - sum += ai->second*ai->second; - ++ai; - } - while (bi != b.end()) - { - sum += bi->second*bi->second; - ++bi; - } - - return sum; - } - -// ------------------------------------------------------------------------------------ - - template <typename T, typename U, typename V, typename W> - typename T::value_type::second_type distance_squared ( - const V& a_scale, - const T& a, - const W& b_scale, - const U& b - ) - { - typedef typename T::value_type::second_type scalar_type; - typedef typename U::value_type::second_type scalar_typeU; - // Both T and U must contain the same kinds of elements - COMPILE_TIME_ASSERT((is_same_type<scalar_type, scalar_typeU>::value)); - - typename T::const_iterator ai = a.begin(); - typename U::const_iterator bi = b.begin(); - - scalar_type sum = 0, temp = 0; - while (ai != a.end() && bi != b.end()) - { - if (ai->first == bi->first) - { - temp = a_scale*ai->second - b_scale*bi->second; - ++ai; - ++bi; - } - else if (ai->first < bi->first) - { - temp = a_scale*ai->second; - ++ai; - } - else - { - temp = b_scale*bi->second; - ++bi; - } - - sum += temp*temp; - } - - while (ai != a.end()) - { - sum += a_scale*a_scale*ai->second*ai->second; - ++ai; - } - while (bi != b.end()) - { - sum += b_scale*b_scale*bi->second*bi->second; - ++bi; - } - - return sum; - } - -// ------------------------------------------------------------------------------------ - - template <typename T, typename U> - typename T::value_type::second_type distance ( - const T& a, - const U& b - ) - { - return std::sqrt(distance_squared(a,b)); - } - -// ------------------------------------------------------------------------------------ - - template <typename T, typename U, typename V, typename W> - typename T::value_type::second_type distance ( - const V& a_scale, - const T& a, - const W& b_scale, - const U& b - ) - { - return std::sqrt(distance_squared(a_scale,a,b_scale,b)); - } - -// ------------------------------------------------------------------------------------ - // ------------------------------------------------------------------------------------ - - template <typename T, typename EXP> - typename enable_if<is_matrix<T> >::type assign ( - T& dest, - const matrix_exp<EXP>& src - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_vector(src), - "\t void assign(dest,src)" - << "\n\t the src matrix must be a row or column vector" - ); - - dest = src; - } - - template <typename T, typename EXP> - typename disable_if<is_matrix<T> >::type assign ( - T& dest, - const matrix_exp<EXP>& src - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_vector(src), - "\t void assign(dest,src)" - << "\n\t the src matrix must be a row or column vector" - ); - - dest.clear(); - typedef typename T::value_type item_type; - for (long i = 0; i < src.size(); ++i) - { - dest.insert(dest.end(),item_type(i, src(i))); - } - } - - template <typename T, typename U> - typename disable_if_c<is_matrix<T>::value || is_matrix<U>::value>::type assign ( - T& dest, // sparse - const U& src // sparse - ) - { - dest.assign(src.begin(), src.end()); - } - - template <typename T, typename U, typename Comp, typename Alloc, typename S> - typename disable_if<is_matrix<S> >::type assign ( - std::map<T,U,Comp,Alloc>& dest, // sparse - const S& src // sparse - ) - { - dest.clear(); - dest.insert(src.begin(), src.end()); - } - -// ------------------------------------------------------------------------------------ - // ------------------------------------------------------------------------------------ - - template <typename T> - struct has_unsigned_keys - { - static const bool value = is_unsigned_type<typename T::value_type::first_type>::value; - }; - -// ------------------------------------------------------------------------------------ - - namespace impl - { - template <typename T, typename U> - typename T::value_type::second_type general_dot ( - const T& a, - const U& b - ) - { - typedef typename T::value_type::second_type scalar_type; - - typename T::const_iterator ai = a.begin(); - typename U::const_iterator bi = b.begin(); - - scalar_type sum = 0; - while (ai != a.end() && bi != b.end()) - { - if (ai->first == bi->first) - { - sum += ai->second * bi->second; - ++ai; - ++bi; - } - else if (ai->first < bi->first) - { - ++ai; - } - else - { - ++bi; - } - } - - return sum; - } - - template <typename T, typename U> - inline typename T::value_type::second_type dot ( - const T& a, - const U& b - ) - { - return general_dot(a,b); - } - - template <typename T, typename U, typename alloc> - U dot ( - const std::vector<std::pair<T,U>,alloc>& a, - const std::vector<std::pair<T,U>,alloc>& b - ) - { - // You are getting this error because you are attempting to use sparse sample vectors - // but you aren't using an unsigned integer as your key type in the sparse vectors. - COMPILE_TIME_ASSERT(is_unsigned_type<T>::value); - - if (a.size() == 0 || b.size() == 0) - return 0; - - // if a is really a dense vector but just represented in a sparse container - if (a.back().first == a.size()-1) - { - double sum = 0; - for (unsigned long i = 0; i < b.size(); ++i) - { - if (b[i].first >= a.size()) - break; - sum += a[b[i].first].second * b[i].second; - } - return sum; - } - // if b is really a dense vector but just represented in a sparse container - else if (b.back().first == b.size()-1) - { - double sum = 0; - for (unsigned long i = 0; i < a.size(); ++i) - { - if (a[i].first >= b.size()) - break; - sum += b[a[i].first].second * a[i].second; - } - return sum; - } - else - { - return general_dot(a,b); - } - } - } - - template <typename T> - inline typename T::value_type::second_type dot ( - const T& a, - const T& b - ) - { - return impl::dot(a,b); - } - - template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6> - inline T4 dot ( - const std::vector<T1,T2>& a, - const std::map<T3,T4,T5,T6>& b - ) - { - return impl::dot(a,b); - } - - template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6> - inline T4 dot ( - const std::map<T3,T4,T5,T6>& a, - const std::vector<T1,T2>& b - ) - { - return impl::dot(a,b); - } - -// ------------------------------------------------------------------------------------ - - template <typename T, typename EXP> - typename T::value_type::second_type dot ( - const T& a, - const matrix_exp<EXP>& b - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_vector(b), - "\t scalar_type dot(sparse_vector a, dense_vector b)" - << "\n\t 'b' must be a vector to be used in a dot product." - ); - - typedef typename T::value_type::second_type scalar_type; - typedef typename T::value_type::first_type first_type; - - scalar_type sum = 0; - for (typename T::const_iterator ai = a.begin(); - (ai != a.end()) && (ai->first < static_cast<first_type>(b.size())); - ++ai) - { - sum += ai->second * b(ai->first); - } - - return sum; - } - -// ------------------------------------------------------------------------------------ - - template <typename T, typename EXP> - typename T::value_type::second_type dot ( - const matrix_exp<EXP>& b, - const T& a - ) - { - return dot(a,b); - } - -// ------------------------------------------------------------------------------------ - - template <typename T> - typename T::value_type::second_type length_squared ( - const T& a - ) - { - typedef typename T::value_type::second_type scalar_type; - - typename T::const_iterator i; - - scalar_type sum = 0; - - for (i = a.begin(); i != a.end(); ++i) - { - sum += i->second * i->second; - } - - return sum; - } - -// ------------------------------------------------------------------------------------ - - template <typename T> - typename T::value_type::second_type length ( - const T& a - ) - { - return std::sqrt(length_squared(a)); - } - -// ------------------------------------------------------------------------------------ - - template <typename T, typename U> - typename disable_if<is_matrix<T>,void>::type scale_by ( - T& a, - const U& value - ) - { - for (typename T::iterator i = a.begin(); i != a.end(); ++i) - { - i->second *= value; - } - } - - template <typename T, typename U> - typename enable_if<is_matrix<T>,void>::type scale_by ( - T& a, - const U& value - ) - { - a *= value; - } - -// ------------------------------------------------------------------------------------ - - template <typename T> - typename disable_if<is_matrix<T>,T>::type add ( - const T& a, - const T& b - ) - { - T temp; - - typename T::const_iterator i = a.begin(); - typename T::const_iterator j = b.begin(); - while (i != a.end() && j != b.end()) - { - if (i->first == j->first) - { - temp.insert(temp.end(), std::make_pair(i->first, i->second + j->second)); - ++i; - ++j; - } - else if (i->first < j->first) - { - temp.insert(temp.end(), *i); - ++i; - } - else - { - temp.insert(temp.end(), *j); - ++j; - } - } - - while (i != a.end()) - { - temp.insert(temp.end(), *i); - ++i; - } - while (j != b.end()) - { - temp.insert(temp.end(), *j); - ++j; - } - - return temp; - } - - template <typename T, typename U> - typename enable_if_c<is_matrix<T>::value && is_matrix<U>::value, matrix_add_exp<T,U> >::type add ( - const T& a, - const U& b - ) - { - return matrix_add_exp<T,U>(a.ref(),b.ref()); - } - -// ------------------------------------------------------------------------------------ - - template <typename T> - typename disable_if<is_matrix<T>,T>::type subtract ( - const T& a, - const T& b - ) - { - T temp; - - typename T::const_iterator i = a.begin(); - typename T::const_iterator j = b.begin(); - while (i != a.end() && j != b.end()) - { - if (i->first == j->first) - { - temp.insert(temp.end(), std::make_pair(i->first, i->second - j->second)); - ++i; - ++j; - } - else if (i->first < j->first) - { - temp.insert(temp.end(), *i); - ++i; - } - else - { - temp.insert(temp.end(), std::make_pair(j->first, -j->second)); - ++j; - } - } - - while (i != a.end()) - { - temp.insert(temp.end(), *i); - ++i; - } - while (j != b.end()) - { - temp.insert(temp.end(), std::make_pair(j->first, -j->second)); - ++j; - } - - return temp; - } - - template <typename T, typename U> - typename enable_if_c<is_matrix<T>::value && is_matrix<U>::value, matrix_subtract_exp<T,U> >::type subtract ( - const T& a, - const U& b - ) - { - return matrix_subtract_exp<T,U>(a.ref(),b.ref()); - } - -// ------------------------------------------------------------------------------------ -// ------------------------------------------------------------------------------------ - - namespace impl - { - template <typename T> - typename enable_if<is_matrix<typename T::type>,unsigned long>::type max_index_plus_one ( - const T& samples - ) - { - if (samples.size() > 0) - return samples(0).size(); - else - return 0; - } - - template <typename T> - typename enable_if<is_built_in_scalar_type<typename T::type>,unsigned long>::type max_index_plus_one ( - const T& sample - ) - { - return sample.size(); - } - - // This !is_built_in_scalar_type<typename T::type>::value is here to avoid an inexplicable bug in Vistual Studio 2005 - template <typename T> - typename enable_if_c<(!is_built_in_scalar_type<typename T::type>::value) && (is_pair<typename T::type::value_type>::value) ,unsigned long>::type - max_index_plus_one ( - const T& samples - ) - { - typedef typename T::type sample_type; - // You are getting this error because you are attempting to use sparse sample vectors - // but you aren't using an unsigned integer as your key type in the sparse vectors. - COMPILE_TIME_ASSERT(has_unsigned_keys<sample_type>::value); - - - // these should be sparse samples so look over all them to find the max index. - unsigned long max_dim = 0; - for (long i = 0; i < samples.size(); ++i) - { - if (samples(i).size() > 0) - max_dim = std::max<unsigned long>(max_dim, (--samples(i).end())->first + 1); - } - - return max_dim; - } - } - - template <typename T> - typename enable_if<is_pair<typename T::value_type>,unsigned long>::type max_index_plus_one ( - const T& sample - ) - { - if (sample.size() > 0) - return (--sample.end())->first + 1; - return 0; - } - - template <typename T> - typename disable_if_c<is_pair<typename T::value_type>::value || - is_same_type<typename T::value_type,sample_pair>::value || - is_same_type<typename T::value_type,ordered_sample_pair>::value , unsigned long>::type - max_index_plus_one ( - const T& samples - ) - { - return impl::max_index_plus_one(mat(samples)); - } - -// ------------------------------------------------------------------------------------ - - template <typename T, long NR, long NC, typename MM, typename L, typename EXP> - inline void add_to ( - matrix<T,NR,NC,MM,L>& dest, - const matrix_exp<EXP>& src - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_vector(dest) && max_index_plus_one(src) <= static_cast<unsigned long>(dest.size()), - "\t void add_to(dest,src)" - << "\n\t dest must be a vector large enough to hold the src vector." - << "\n\t is_vector(dest): " << is_vector(dest) - << "\n\t max_index_plus_one(src): " << max_index_plus_one(src) - << "\n\t dest.size(): " << dest.size() - ); - - for (long r = 0; r < src.size(); ++r) - dest(r) += src(r); - } - - template <typename T, long NR, long NC, typename MM, typename L, typename EXP> - inline typename disable_if<is_matrix<EXP> >::type add_to ( - matrix<T,NR,NC,MM,L>& dest, - const EXP& src - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_vector(dest) && max_index_plus_one(src) <= static_cast<unsigned long>(dest.size()), - "\t void add_to(dest,src)" - << "\n\t dest must be a vector large enough to hold the src vector." - << "\n\t is_vector(dest): " << is_vector(dest) - << "\n\t max_index_plus_one(src): " << max_index_plus_one(src) - << "\n\t dest.size(): " << dest.size() - ); - - for (typename EXP::const_iterator i = src.begin(); i != src.end(); ++i) - dest(i->first) += i->second; - } - -// ------------------------------------------------------------------------------------ - - template <typename T, long NR, long NC, typename MM, typename L, typename EXP, typename U> - inline void add_to ( - matrix<T,NR,NC,MM,L>& dest, - const matrix_exp<EXP>& src, - const U& C - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_vector(dest) && max_index_plus_one(src) <= static_cast<unsigned long>(dest.size()), - "\t void add_to(dest,src)" - << "\n\t dest must be a vector large enough to hold the src vector." - << "\n\t is_vector(dest): " << is_vector(dest) - << "\n\t max_index_plus_one(src): " << max_index_plus_one(src) - << "\n\t dest.size(): " << dest.size() - ); - - for (long r = 0; r < src.size(); ++r) - dest(r) += C*src(r); - } - - template <typename T, long NR, long NC, typename MM, typename L, typename EXP, typename U> - inline typename disable_if<is_matrix<EXP> >::type add_to ( - matrix<T,NR,NC,MM,L>& dest, - const EXP& src, - const U& C - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_vector(dest) && max_index_plus_one(src) <= static_cast<unsigned long>(dest.size()), - "\t void add_to(dest,src)" - << "\n\t dest must be a vector large enough to hold the src vector." - << "\n\t is_vector(dest): " << is_vector(dest) - << "\n\t max_index_plus_one(src): " << max_index_plus_one(src) - << "\n\t dest.size(): " << dest.size() - ); - - for (typename EXP::const_iterator i = src.begin(); i != src.end(); ++i) - dest(i->first) += C*i->second; - } - -// ------------------------------------------------------------------------------------ - - template <typename T, long NR, long NC, typename MM, typename L, typename EXP> - inline void subtract_from ( - matrix<T,NR,NC,MM,L>& dest, - const matrix_exp<EXP>& src - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_vector(dest) && max_index_plus_one(src) <= static_cast<unsigned long>(dest.size()), - "\t void subtract_from(dest,src)" - << "\n\t dest must be a vector large enough to hold the src vector." - << "\n\t is_vector(dest): " << is_vector(dest) - << "\n\t max_index_plus_one(src): " << max_index_plus_one(src) - << "\n\t dest.size(): " << dest.size() - ); - - for (long r = 0; r < src.size(); ++r) - dest(r) -= src(r); - } - - template <typename T, long NR, long NC, typename MM, typename L, typename EXP> - inline typename disable_if<is_matrix<EXP> >::type subtract_from ( - matrix<T,NR,NC,MM,L>& dest, - const EXP& src - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_vector(dest) && max_index_plus_one(src) <= static_cast<unsigned long>(dest.size()), - "\t void subtract_from(dest,src)" - << "\n\t dest must be a vector large enough to hold the src vector." - << "\n\t is_vector(dest): " << is_vector(dest) - << "\n\t max_index_plus_one(src): " << max_index_plus_one(src) - << "\n\t dest.size(): " << dest.size() - ); - - for (typename EXP::const_iterator i = src.begin(); i != src.end(); ++i) - dest(i->first) -= i->second; - } - -// ------------------------------------------------------------------------------------ - - template <typename T, long NR, long NC, typename MM, typename L, typename EXP, typename U> - inline void subtract_from ( - matrix<T,NR,NC,MM,L>& dest, - const matrix_exp<EXP>& src, - const U& C - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_vector(dest) && max_index_plus_one(src) <= static_cast<unsigned long>(dest.size()), - "\t void subtract_from(dest,src)" - << "\n\t dest must be a vector large enough to hold the src vector." - << "\n\t is_vector(dest): " << is_vector(dest) - << "\n\t max_index_plus_one(src): " << max_index_plus_one(src) - << "\n\t dest.size(): " << dest.size() - ); - - for (long r = 0; r < src.size(); ++r) - dest(r) -= C*src(r); - } - - template <typename T, long NR, long NC, typename MM, typename L, typename EXP, typename U> - inline typename disable_if<is_matrix<EXP> >::type subtract_from ( - matrix<T,NR,NC,MM,L>& dest, - const EXP& src, - const U& C - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_vector(dest) && max_index_plus_one(src) <= static_cast<unsigned long>(dest.size()), - "\t void subtract_from(dest,src)" - << "\n\t dest must be a vector large enough to hold the src vector." - << "\n\t is_vector(dest): " << is_vector(dest) - << "\n\t max_index_plus_one(src): " << max_index_plus_one(src) - << "\n\t dest.size(): " << dest.size() - ); - - for (typename EXP::const_iterator i = src.begin(); i != src.end(); ++i) - dest(i->first) -= C*i->second; - } - -// ------------------------------------------------------------------------------------ - - template <typename T> - typename T::value_type::second_type min ( - const T& a - ) - { - typedef typename T::value_type::second_type type; - - type temp = 0; - for (typename T::const_iterator i = a.begin(); i != a.end(); ++i) - { - if (temp > i->second) - temp = i->second; - } - return temp; - } - -// ------------------------------------------------------------------------------------ - - template <typename T> - typename T::value_type::second_type max ( - const T& a - ) - { - typedef typename T::value_type::second_type type; - - type temp = 0; - for (typename T::const_iterator i = a.begin(); i != a.end(); ++i) - { - if (temp < i->second) - temp = i->second; - } - return temp; - } - -// ---------------------------------------------------------------------------------------- - - namespace impl - { - template <typename sparse_vector_type> - inline matrix<typename sparse_vector_type::value_type::second_type,0,1> sparse_to_dense ( - const sparse_vector_type& vect, - unsigned long num_dimensions - ) - { - // You must use unsigned integral key types in your sparse vectors - typedef typename sparse_vector_type::value_type::first_type idx_type; - typedef typename sparse_vector_type::value_type::second_type value_type; - COMPILE_TIME_ASSERT(is_unsigned_type<idx_type>::value); - - matrix<value_type,0,1> result; - - if (vect.size() == 0) - return result; - - result.set_size(num_dimensions); - result = 0; - - for (typename sparse_vector_type::const_iterator j = vect.begin(); j != vect.end(); ++j) - { - if ((long)(j->first) < result.size()) - { - result(j->first) += j->second; - } - } - - return result; - } - } - -// ---------------------------------------------------------------------------------------- - - template <typename idx_type, typename value_type, typename alloc> - matrix<value_type,0,1> sparse_to_dense ( - const std::vector<std::pair<idx_type,value_type>,alloc>& vect, - unsigned long num_dimensions - ) - { - return impl::sparse_to_dense(vect,num_dimensions); - } - -// ---------------------------------------------------------------------------------------- - - template <typename idx_type, typename value_type, typename alloc> - matrix<value_type,0,1> sparse_to_dense ( - const std::vector<std::pair<idx_type,value_type>,alloc>& vect - ) - { - return impl::sparse_to_dense(vect, max_index_plus_one(vect)); - } - -// ---------------------------------------------------------------------------------------- - - template <typename T1, typename T2, typename T3, typename T4> - matrix<T2,0,1> sparse_to_dense ( - const std::map<T1,T2,T3,T4>& vect, - unsigned long num_dimensions - ) - { - return impl::sparse_to_dense(vect,num_dimensions); - } - -// ---------------------------------------------------------------------------------------- - - template <typename T1, typename T2, typename T3, typename T4> - matrix<T2,0,1> sparse_to_dense ( - const std::map<T1,T2,T3,T4>& vect - ) - { - return impl::sparse_to_dense(vect, max_index_plus_one(vect)); - } - -// ---------------------------------------------------------------------------------------- - - template <typename T> - typename enable_if<is_matrix<T>,T&>::type sparse_to_dense( - T& item - ) { return item; } - - template <typename EXP> - matrix<typename EXP::type,0,1> sparse_to_dense( - const matrix_exp<EXP>& item, - unsigned long num - ) - { - typedef typename EXP::type type; - if (item.size() == (long)num) - return item; - else if (item.size() < (long)num) - return join_cols(item, zeros_matrix<type>((long)num-item.size(),1)); - else - return colm(item,0,(long)num); - } - -// ---------------------------------------------------------------------------------------- - - template <typename sample_type, typename alloc> - std::vector<matrix<typename sample_type::value_type::second_type,0,1> > sparse_to_dense ( - const std::vector<sample_type, alloc>& samples, - unsigned long num_dimensions - ) - { - typedef typename sample_type::value_type pair_type; - typedef typename pair_type::second_type value_type; - - std::vector< matrix<value_type,0,1> > result; - - // now turn all the samples into dense samples - result.resize(samples.size()); - - for (unsigned long i = 0; i < samples.size(); ++i) - { - result[i] = sparse_to_dense(samples[i],num_dimensions); - } - - return result; - } - -// ---------------------------------------------------------------------------------------- - - template <typename sample_type, typename alloc> - std::vector<matrix<typename sample_type::value_type::second_type,0,1> > sparse_to_dense ( - const std::vector<sample_type, alloc>& samples - ) - { - return sparse_to_dense(samples, max_index_plus_one(samples)); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - T make_sparse_vector ( - const T& v - ) - { - // You must use unsigned integral key types in your sparse vectors - typedef typename T::value_type::first_type idx_type; - typedef typename T::value_type::second_type value_type; - COMPILE_TIME_ASSERT(is_unsigned_type<idx_type>::value); - std::map<idx_type,value_type> temp; - for (typename T::const_iterator i = v.begin(); i != v.end(); ++i) - { - temp[i->first] += i->second; - } - - return T(temp.begin(), temp.end()); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - void make_sparse_vector_inplace( - T& vect - ) - { - vect = make_sparse_vector(vect); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename U, - typename alloc - > - void make_sparse_vector_inplace ( - std::vector<std::pair<T,U>,alloc>& vect - ) - { - if (vect.size() > 0) - { - std::sort(vect.begin(), vect.end()); - - // merge duplicates - for (unsigned long i = 1; i < vect.size(); ++i) - { - // if we found a duplicate - if (vect[i-1].first == vect[i].first) - { - // now start collapsing and merging the vector - unsigned long j = i-1; - for (unsigned long k = i; k < vect.size(); ++k) - { - if (vect[j].first == vect[k].first) - { - vect[j].second += vect[k].second; - } - else - { - ++j; - vect[j] = vect[k]; - } - } - - - // we removed elements when we merged so we need to adjust the size. - vect.resize(j+1); - return; - } - } - } - } - -// ---------------------------------------------------------------------------------------- - - template <typename EXP, typename T, long NR, long NC, typename MM, typename L> - void sparse_matrix_vector_multiply ( - const std::vector<sample_pair>& edges, - const matrix_exp<EXP>& v, - matrix<T,NR,NC,MM,L>& result - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(max_index_plus_one(edges) <= (unsigned long)v.size() && - is_col_vector(v), - "\t void sparse_matrix_vector_multiply()" - << "\n\t Invalid inputs were given to this function" - << "\n\t max_index_plus_one(edges): " << max_index_plus_one(edges) - << "\n\t v.size(): " << v.size() - << "\n\t is_col_vector(v): " << is_col_vector(v) - ); - - result.set_size(v.nr(),v.nc()); - result = 0; - - for (unsigned long k = 0; k < edges.size(); ++k) - { - const long i = edges[k].index1(); - const long j = edges[k].index2(); - const double d = edges[k].distance(); - - result(i) += v(j)*d; - if (i != j) - result(j) += v(i)*d; - } - } - -// ---------------------------------------------------------------------------------------- - - template <typename EXP> - matrix<typename EXP::type,0,1> sparse_matrix_vector_multiply ( - const std::vector<sample_pair>& edges, - const matrix_exp<EXP>& v - ) - { - matrix<typename EXP::type,0,1> result; - sparse_matrix_vector_multiply(edges,v,result); - return result; - } - -// ---------------------------------------------------------------------------------------- - - template <typename EXP, typename T, long NR, long NC, typename MM, typename L> - void sparse_matrix_vector_multiply ( - const std::vector<ordered_sample_pair>& edges, - const matrix_exp<EXP>& v, - matrix<T,NR,NC,MM,L>& result - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(max_index_plus_one(edges) <= (unsigned long)v.size() && - is_col_vector(v), - "\t void sparse_matrix_vector_multiply()" - << "\n\t Invalid inputs were given to this function" - << "\n\t max_index_plus_one(edges): " << max_index_plus_one(edges) - << "\n\t v.size(): " << v.size() - << "\n\t is_col_vector(v): " << is_col_vector(v) - ); - - - result.set_size(v.nr(),v.nc()); - result = 0; - - for (unsigned long k = 0; k < edges.size(); ++k) - { - const long i = edges[k].index1(); - const long j = edges[k].index2(); - const double d = edges[k].distance(); - - result(i) += v(j)*d; - } - } - -// ---------------------------------------------------------------------------------------- - - template <typename EXP> - matrix<typename EXP::type,0,1> sparse_matrix_vector_multiply ( - const std::vector<ordered_sample_pair>& edges, - const matrix_exp<EXP>& v - ) - { - matrix<typename EXP::type,0,1> result; - sparse_matrix_vector_multiply(edges,v,result); - return result; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename EXP, - typename sparse_vector_type, - typename T, - long NR, - long NC, - typename MM, - typename L - > - void sparse_matrix_vector_multiply ( - const matrix_exp<EXP>& m, - const sparse_vector_type& v, - matrix<T,NR,NC,MM,L>& result - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(max_index_plus_one(v) <= (unsigned long)m.nc(), - "\t void sparse_matrix_vector_multiply()" - << "\n\t Invalid inputs were given to this function" - << "\n\t max_index_plus_one(v): " << max_index_plus_one(v) - << "\n\t m.size(): " << m.size() - ); - - result.set_size(m.nr(),1); - result = 0; - - for (typename sparse_vector_type::const_iterator i = v.begin(); i != v.end(); ++i) - { - for (long r = 0; r < result.nr(); ++r) - { - result(r) += m(r, i->first)*i->second; - } - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename EXP, - typename sparse_vector_type - > - matrix<typename EXP::type,0,1> sparse_matrix_vector_multiply ( - const matrix_exp<EXP>& m, - const sparse_vector_type& v - ) - { - matrix<typename EXP::type,0,1> result; - sparse_matrix_vector_multiply(m,v,result); - return result; - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SVm_SPARSE_VECTOR - diff --git a/ml/dlib/dlib/svm/sparse_vector_abstract.h b/ml/dlib/dlib/svm/sparse_vector_abstract.h deleted file mode 100644 index e0c8d1f8c..000000000 --- a/ml/dlib/dlib/svm/sparse_vector_abstract.h +++ /dev/null @@ -1,688 +0,0 @@ -// Copyright (C) 2009 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_SVm_SPARSE_VECTOR_ABSTRACT_ -#ifdef DLIB_SVm_SPARSE_VECTOR_ABSTRACT_ - -#include <cmath> -#include "../algs.h" -#include "../serialize.h" -#include "../matrix.h" -#include <map> -#include <vector> -#include "../graph_utils/sample_pair_abstract.h" -#include "../graph_utils/ordered_sample_pair_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - /*!A sparse_vectors - - In dlib, sparse vectors are represented using the container objects - in the C++ STL. In particular, a sparse vector is any container that - contains a range of std::pair<key, scalar_value> objects where: - - key is an unsigned integral type - - scalar_value is float, double, or long double - - the std::pair objects have unique key values - - the std::pair objects are sorted such that small keys come first - - Therefore, if an object satisfies the above requirements we call it a - "sparse vector". Additionally, we define the concept of an "unsorted sparse vector" - to be a sparse vector that doesn't necessarily have sorted or unique key values. - Therefore, all sparse vectors are valid unsorted sparse vectors but not the other - way around. - - An unsorted sparse vector with duplicate keys is always interpreted as - a vector where each dimension contains the sum of all corresponding elements - of the unsorted sparse vector. For example, an unsorted sparse vector - with the elements { (3,1), (0, 4), (3,5) } represents the 4D vector: - [4, 0, 0, 1+5] - - - - Examples of valid sparse vectors are: - - std::map<unsigned long, double> - - std::vector<std::pair<unsigned long, float> > where the vector is sorted. - (you could make sure it was sorted by applying std::sort to it) - - - Finally, by "dense vector" we mean a dlib::matrix object which represents - either a row or column vector. - - The rest of this file defines a number of helper functions for doing normal - vector arithmetic things with sparse vectors. - !*/ - -// ---------------------------------------------------------------------------------------- - - /*!A has_unsigned_keys - - This is a template where has_unsigned_keys<T>::value == true when T is a - sparse vector that contains unsigned integral keys and false otherwise. - !*/ - - template <typename T> - struct has_unsigned_keys - { - static const bool value = is_unsigned_type<typename T::value_type::first_type>::value; - }; - -// ---------------------------------------------------------------------------------------- - - template <typename T, typename U> - typename T::value_type::second_type distance_squared ( - const T& a, - const U& b - ); - /*! - requires - - a and b are sparse vectors - ensures - - returns the squared distance between the vectors - a and b - !*/ - -// ---------------------------------------------------------------------------------------- - - template <typename T, typename U, typename V, typename W> - typename T::value_type::second_type distance_squared ( - const V& a_scale, - const T& a, - const W& b_scale, - const U& b - ); - /*! - requires - - a and b are sparse vectors - ensures - - returns the squared distance between the vectors - a_scale*a and b_scale*b - !*/ - -// ---------------------------------------------------------------------------------------- - - template <typename T, typename U> - typename T::value_type::second_type distance ( - const T& a, - const U& b - ); - /*! - requires - - a and b are sparse vectors - ensures - - returns the distance between the vectors - a and b. (i.e. std::sqrt(distance_squared(a,b))) - !*/ - -// ---------------------------------------------------------------------------------------- - - template <typename T, typename U, typename V, typename W> - typename T::value_type::second_type distance ( - const V& a_scale, - const T& a, - const W& b_scale, - const U& b - ); - /*! - requires - - a and b are sparse vectors - ensures - - returns the distance between the vectors - a_scale*a and b_scale*b. (i.e. std::sqrt(distance_squared(a_scale,a,b_scale,b))) - !*/ - -// ---------------------------------------------------------------------------------------- - - template <typename T, typename U> - void assign ( - T& dest, - const U& src - ); - /*! - requires - - dest == a sparse vector or a dense vector - - src == a sparse vector or a dense vector - - dest is not dense when src is sparse - (i.e. you can't assign a sparse vector to a dense vector. This is - because we don't know what the proper dimensionality should be for the - dense vector) - ensures - - #src represents the same vector as dest. - (conversion between sparse/dense formats is done automatically) - !*/ - - -// ---------------------------------------------------------------------------------------- - - template <typename T> - typename T::value_type::second_type dot ( - const T& a, - const T& b - ); - /*! - requires - - a and b are sparse vectors - ensures - - returns the dot product between the vectors a and b - !*/ - - template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6> - T4 dot ( - const std::vector<T1,T2>& a, - const std::map<T3,T4,T5,T6>& b - ); - /*! - requires - - a and b are sparse vectors - ensures - - returns the dot product between the vectors a and b - !*/ - - template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6> - T4 dot ( - const std::map<T3,T4,T5,T6>& a, - const std::vector<T1,T2>& b - ); - /*! - requires - - a and b are sparse vectors - ensures - - returns the dot product between the vectors a and b - !*/ - -// ---------------------------------------------------------------------------------------- - - template <typename T, typename EXP> - typename T::value_type::second_type dot ( - const T& a, - const matrix_exp<EXP>& b - ); - /*! - requires - - a is an unsorted sparse vector - - is_vector(b) == true - ensures - - returns the dot product between the vectors a and b. - - if (max_index_plus_one(a) >= b.size()) then - - a's dimensionality is greater than b's dimensionality. In this case we - pretend b is padded by as many zeros as is needed to make the dot product - work. So this means that any elements in a that go beyond the length of - b are simply ignored. - !*/ - -// ---------------------------------------------------------------------------------------- - - template <typename T, typename EXP> - typename T::value_type::second_type dot ( - const matrix_exp<EXP>& a, - const T& b - ); - /*! - requires - - b is an unsorted sparse vector - - is_vector(a) == true - ensures - - returns the dot product between the vectors a and b - - if (max_index_plus_one(b) >= a.size()) then - - b's dimensionality is greater than a's dimensionality. In this case we - pretend a is padded by as many zeros as is needed to make the dot product - work. So this means that any elements in b that go beyond the length of - a are simply ignored. - !*/ - -// ---------------------------------------------------------------------------------------- - - template <typename T> - typename T::value_type::second_type length_squared ( - const T& a - ); - /*! - requires - - a is a sparse vector - ensures - - returns dot(a,a) - !*/ - -// ---------------------------------------------------------------------------------------- - - template <typename T> - typename T::value_type::second_type length ( - const T& a - ); - /*! - requires - - a is a sparse vector - ensures - - returns std::sqrt(length_squared(a,a)) - !*/ - -// ---------------------------------------------------------------------------------------- - - template <typename T, typename U> - void scale_by ( - T& a, - const U& value - ); - /*! - requires - - a is an unsorted sparse vector or a dlib::matrix - ensures - - #a == a*value - (i.e. multiplies every element of the vector a by value) - !*/ - -// ---------------------------------------------------------------------------------------- - - template <typename T> - T add ( - const T& a, - const T& b - ); - /*! - requires - - a is a sparse vector or dlib::matrix - - b is a sparse vector or dlib::matrix - ensures - - returns a vector or matrix which represents a+b. If the inputs are - sparse vectors then the result is a sparse vector. - !*/ - -// ---------------------------------------------------------------------------------------- - - template <typename T> - T subtract ( - const T& a, - const T& b - ); - /*! - requires - - a is a sparse vector or dlib::matrix - - b is a sparse vector or dlib::matrix - ensures - - returns a vector or matrix which represents a-b. If the inputs are - sparse vectors then the result is a sparse vector. - !*/ - -// ---------------------------------------------------------------------------------------- - - template <typename T> - unsigned long max_index_plus_one ( - const T& samples - ); - /*! - requires - - samples == a single vector (either sparse or dense), or a container - of vectors which is either a dlib::matrix of vectors or something - convertible to a dlib::matrix via mat() (e.g. a std::vector) - Valid types of samples include (but are not limited to): - - dlib::matrix<double,0,1> // A single dense vector - - std::map<unsigned int, double> // A single sparse vector - - std::vector<dlib::matrix<double,0,1> > // An array of dense vectors - - std::vector<std::map<unsigned int, double> > // An array of sparse vectors - ensures - - This function tells you the dimensionality of a set of vectors. The vectors - can be either sparse or dense. - - if (samples.size() == 0) then - - returns 0 - - else if (samples contains dense vectors or is a dense vector) then - - returns the number of elements in the first sample vector. This means - we implicitly assume all dense vectors have the same length) - - else - - In this case samples contains sparse vectors or is a sparse vector. - - returns the largest element index in any sample + 1. Note that the element index values - are the values stored in std::pair::first. So this number tells you the dimensionality - of a set of sparse vectors. - !*/ - -// ---------------------------------------------------------------------------------------- - - template <typename T, long NR, long NC, typename MM, typename L, typename SRC, typename U> - inline void add_to ( - matrix<T,NR,NC,MM,L>& dest, - const SRC& src, - const U& C = 1 - ); - /*! - requires - - SRC == a matrix expression or an unsorted sparse vector - - is_vector(dest) == true - - Let MAX denote the largest element index in src. - Then we require that: - - MAX < dest.size() - - (i.e. dest needs to be big enough to contain all the elements of src) - ensures - - #dest == dest + C*src - !*/ - -// ---------------------------------------------------------------------------------------- - - template <typename T, long NR, long NC, typename MM, typename L, typename SRC, typename U> - inline void subtract_from ( - matrix<T,NR,NC,MM,L>& dest, - const SRC& src, - const U& C = 1 - ); - /*! - requires - - SRC == a matrix expression or an unsorted sparse vector - - is_vector(dest) == true - - Let MAX denote the largest element index in src. - Then we require that: - - MAX < dest.size() - - (i.e. dest needs to be big enough to contain all the elements of src) - ensures - - #dest == dest - C*src - !*/ - -// ---------------------------------------------------------------------------------------- - - template <typename T> - typename T::value_type::second_type min ( - const T& vect - ); - /*! - requires - - T == an unsorted sparse vector - ensures - - returns the minimum value in the sparse vector vect. Note that - this value is always <= 0 since a sparse vector has an unlimited number - of 0 elements. - !*/ - -// ------------------------------------------------------------------------------------ - - template <typename T> - typename T::value_type::second_type max ( - const T& vect - ); - /*! - requires - - T == an unsorted sparse vector - ensures - - returns the maximum value in the sparse vector vect. Note that - this value is always >= 0 since a sparse vector has an unlimited number - of 0 elements. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename sample_type - > - matrix<typename sample_type::value_type::second_type,0,1> sparse_to_dense ( - const sample_type& vect - ); - /*! - requires - - vect must be a sparse vector or a dense column vector. - ensures - - converts the single sparse or dense vector vect to a dense (column matrix form) - representation. That is, this function returns a vector V such that: - - V.size() == max_index_plus_one(vect) - - for all valid j: - - V(j) == The value of the j'th dimension of the vector vect. Note - that V(j) is zero if it is a sparse vector that doesn't contain an - entry for the j'th dimension. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename sample_type - > - matrix<typename sample_type::value_type::second_type,0,1> sparse_to_dense ( - const sample_type& vect, - unsigned long num_dimensions - ); - /*! - requires - - vect must be a sparse vector or a dense column vector. - ensures - - converts the single sparse or dense vector vect to a dense (column matrix form) - representation. That is, this function returns a vector V such that: - - V.size() == num_dimensions - - for all valid j: - - V(j) == The value of the j'th dimension of the vector vect. Note - that V(j) is zero if it is a sparse vector that doesn't contain an - entry for the j'th dimension. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename sample_type, - typename alloc - > - std::vector<matrix<typename sample_type::value_type::second_type,0,1> > sparse_to_dense ( - const std::vector<sample_type, alloc>& samples - ); - /*! - requires - - all elements of samples must be sparse vectors or dense column vectors. - ensures - - converts from sparse sample vectors to dense (column matrix form) - - That is, this function returns a std::vector R such that: - - R contains column matrices - - R.size() == samples.size() - - for all valid i: - - R[i] == sparse_to_dense(samples[i], max_index_plus_one(samples)) - (i.e. the dense (i.e. dlib::matrix) version of the sparse sample - given by samples[i].) - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename sample_type, - typename alloc - > - std::vector<matrix<typename sample_type::value_type::second_type,0,1> > sparse_to_dense ( - const std::vector<sample_type, alloc>& samples, - unsigned long num_dimensions - ); - /*! - requires - - all elements of samples must be sparse vectors or dense column vectors. - ensures - - converts from sparse sample vectors to dense (column matrix form) - - That is, this function returns a std::vector R such that: - - R contains column matrices - - R.size() == samples.size() - - for all valid i: - - R[i] == sparse_to_dense(samples[i], num_dimensions) - (i.e. the dense (i.e. dlib::matrix) version of the sparse sample - given by samples[i].) - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - T make_sparse_vector ( - const T& v - ); - /*! - requires - - v is an unsorted sparse vector - ensures - - returns a copy of v which is a sparse vector. - (i.e. it will be properly sorted and not have any duplicate key values but - will still logically represent the same vector). - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - void make_sparse_vector_inplace( - T& vect - ); - /*! - requires - - v is an unsorted sparse vector - ensures - - vect == make_sparse_vector(vect) - - This function is just an optimized version of make_sparse_vector(), in - particular, when T is a std::vector<std::pair<>> type it is much more - efficient. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename EXP, - typename T, - long NR, - long NC, - typename MM, - typename L - > - void sparse_matrix_vector_multiply ( - const std::vector<sample_pair>& edges, - const matrix_exp<EXP>& v, - matrix<T,NR,NC,MM,L>& result - ); - /*! - requires - - is_col_vector(v) == true - - max_index_plus_one(edges) <= v.size() - ensures - - Interprets edges as representing a symmetric sparse matrix M. The elements - of M are defined such that, for all valid i,j: - - M(i,j) == sum of edges[k].distance() for all k where edges[k]==sample_pair(i,j) - - This means that any element of M that doesn't have any edges associated - with it will have a value of 0. - - #result == M*v - (i.e. this function multiplies the vector v with the sparse matrix - represented by edges and stores the output into result) - - get_rect(#result) == get_rect(v) - (i.e. result will have the same dimensions as v) - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename EXP, - typename T, - long NR, - long NC, - typename MM, - typename L - > - void sparse_matrix_vector_multiply ( - const std::vector<ordered_sample_pair>& edges, - const matrix_exp<EXP>& v, - matrix<T,NR,NC,MM,L>& result - ); - /*! - requires - - is_col_vector(v) == true - - max_index_plus_one(edges) <= v.size() - ensures - - Interprets edges as representing a square sparse matrix M. The elements of M - are defined such that, for all valid i,j: - - M(i,j) == sum of edges[k].distance() for all k where edges[k]==ordered_sample_pair(i,j) - - This means that any element of M that doesn't have any edges associated - with it will have a value of 0. - - #result == M*v - (i.e. this function multiplies the vector v with the sparse matrix - represented by edges and stores the output into result) - - get_rect(#result) == get_rect(v) - (i.e. result will have the same dimensions as v) - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename EXP - > - matrix<typename EXP::type,0,1> sparse_matrix_vector_multiply ( - const std::vector<sample_pair>& edges, - const matrix_exp<EXP>& v - ); - /*! - requires - - is_col_vector(v) == true - - max_index_plus_one(edges) <= v.size() - ensures - - This is just a convenience routine for invoking the above - sparse_matrix_vector_multiply() routine. In particular, it just calls - sparse_matrix_vector_multiply() with a temporary result matrix and then - returns the result. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename EXP - > - matrix<typename EXP::type,0,1> sparse_matrix_vector_multiply ( - const std::vector<ordered_sample_pair>& edges, - const matrix_exp<EXP>& v - ); - /*! - requires - - is_col_vector(v) == true - - max_index_plus_one(edges) <= v.size() - ensures - - This is just a convenience routine for invoking the above - sparse_matrix_vector_multiply() routine. In particular, it just calls - sparse_matrix_vector_multiply() with a temporary result matrix and then - returns the result. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename EXP, - typename sparse_vector_type, - typename T, - long NR, - long NC, - typename MM, - typename L - > - void sparse_matrix_vector_multiply ( - const matrix_exp<EXP>& m, - const sparse_vector_type& v, - matrix<T,NR,NC,MM,L>& result - ); - /*! - requires - - max_index_plus_one(v) <= m.nc() - - v == an unsorted sparse vector - ensures - - #result == m*v - (i.e. multiply m by the vector v and store the output in result) - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename EXP, - typename sparse_vector_type - > - matrix<typename EXP::type,0,1> sparse_matrix_vector_multiply ( - const matrix_exp<EXP>& m, - const sparse_vector_type& v - ); - /*! - requires - - max_index_plus_one(v) <= m.nc() - - v == an unsorted sparse vector - ensures - - returns m*v - (i.e. multiply m by the vector v and return the resulting vector) - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SVm_SPARSE_VECTOR_ABSTRACT_ - - - diff --git a/ml/dlib/dlib/svm/structural_assignment_trainer.h b/ml/dlib/dlib/svm/structural_assignment_trainer.h deleted file mode 100644 index d55b74ff0..000000000 --- a/ml/dlib/dlib/svm/structural_assignment_trainer.h +++ /dev/null @@ -1,294 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_STRUCTURAL_ASSiGNMENT_TRAINER_Hh_ -#define DLIB_STRUCTURAL_ASSiGNMENT_TRAINER_Hh_ - -#include "structural_assignment_trainer_abstract.h" -#include "../algs.h" -#include "../optimization.h" -#include "structural_svm_assignment_problem.h" -#include "num_nonnegative_weights.h" - - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - class structural_assignment_trainer - { - public: - typedef typename feature_extractor::lhs_element lhs_element; - typedef typename feature_extractor::rhs_element rhs_element; - typedef std::pair<std::vector<lhs_element>, std::vector<rhs_element> > sample_type; - typedef std::vector<long> label_type; - typedef assignment_function<feature_extractor> trained_function_type; - - structural_assignment_trainer ( - ) - { - set_defaults(); - } - - explicit structural_assignment_trainer ( - const feature_extractor& fe_ - ) : fe(fe_) - { - set_defaults(); - } - - const feature_extractor& get_feature_extractor ( - ) const { return fe; } - - void set_num_threads ( - unsigned long num - ) - { - num_threads = num; - } - - unsigned long get_num_threads ( - ) const - { - return num_threads; - } - - void set_epsilon ( - double eps_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(eps_ > 0, - "\t void structural_assignment_trainer::set_epsilon()" - << "\n\t eps_ must be greater than 0" - << "\n\t eps_: " << eps_ - << "\n\t this: " << this - ); - - eps = eps_; - } - - double get_epsilon ( - ) const { return eps; } - - void set_max_cache_size ( - unsigned long max_size - ) - { - max_cache_size = max_size; - } - - unsigned long get_max_cache_size ( - ) const - { - return max_cache_size; - } - - void be_verbose ( - ) - { - verbose = true; - } - - void be_quiet ( - ) - { - verbose = false; - } - - void set_oca ( - const oca& item - ) - { - solver = item; - } - - const oca get_oca ( - ) const - { - return solver; - } - - void set_c ( - double C_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(C_ > 0, - "\t void structural_assignment_trainer::set_c()" - << "\n\t C_ must be greater than 0" - << "\n\t C_: " << C_ - << "\n\t this: " << this - ); - - C = C_; - } - - double get_c ( - ) const - { - return C; - } - - bool forces_assignment( - ) const { return force_assignment; } - - void set_forces_assignment ( - bool new_value - ) - { - force_assignment = new_value; - } - - void set_loss_per_false_association ( - double loss - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(loss > 0, - "\t void structural_assignment_trainer::set_loss_per_false_association(loss)" - << "\n\t Invalid inputs were given to this function " - << "\n\t loss: " << loss - << "\n\t this: " << this - ); - - loss_per_false_association = loss; - } - - double get_loss_per_false_association ( - ) const - { - return loss_per_false_association; - } - - void set_loss_per_missed_association ( - double loss - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(loss > 0, - "\t void structural_assignment_trainer::set_loss_per_missed_association(loss)" - << "\n\t Invalid inputs were given to this function " - << "\n\t loss: " << loss - << "\n\t this: " << this - ); - - loss_per_missed_association = loss; - } - - double get_loss_per_missed_association ( - ) const - { - return loss_per_missed_association; - } - - bool forces_last_weight_to_1 ( - ) const - { - return last_weight_1; - } - - void force_last_weight_to_1 ( - bool should_last_weight_be_1 - ) - { - last_weight_1 = should_last_weight_be_1; - } - - const assignment_function<feature_extractor> train ( - const std::vector<sample_type>& samples, - const std::vector<label_type>& labels - ) const - { - // make sure requires clause is not broken -#ifdef ENABLE_ASSERTS - if (force_assignment) - { - DLIB_ASSERT(is_forced_assignment_problem(samples, labels), - "\t assignment_function structural_assignment_trainer::train()" - << "\n\t invalid inputs were given to this function" - << "\n\t is_forced_assignment_problem(samples,labels): " << is_forced_assignment_problem(samples,labels) - << "\n\t is_assignment_problem(samples,labels): " << is_assignment_problem(samples,labels) - << "\n\t is_learning_problem(samples,labels): " << is_learning_problem(samples,labels) - ); - } - else - { - DLIB_ASSERT(is_assignment_problem(samples, labels), - "\t assignment_function structural_assignment_trainer::train()" - << "\n\t invalid inputs were given to this function" - << "\n\t is_assignment_problem(samples,labels): " << is_assignment_problem(samples,labels) - << "\n\t is_learning_problem(samples,labels): " << is_learning_problem(samples,labels) - ); - } -#endif - - - - structural_svm_assignment_problem<feature_extractor> prob(samples,labels, fe, force_assignment, num_threads, - loss_per_false_association, loss_per_missed_association); - - if (verbose) - prob.be_verbose(); - - prob.set_c(C); - prob.set_epsilon(eps); - prob.set_max_cache_size(max_cache_size); - - matrix<double,0,1> weights; - - // Take the min here because we want to prevent the user from accidentally - // forcing the bias term to be non-negative. - const unsigned long num_nonneg = std::min(fe.num_features(),num_nonnegative_weights(fe)); - if (last_weight_1) - solver(prob, weights, num_nonneg, fe.num_features()-1); - else - solver(prob, weights, num_nonneg); - - const double bias = weights(weights.size()-1); - return assignment_function<feature_extractor>(colm(weights,0,weights.size()-1), bias,fe,force_assignment); - - } - - - private: - - bool force_assignment; - double C; - oca solver; - double eps; - bool verbose; - unsigned long num_threads; - unsigned long max_cache_size; - double loss_per_false_association; - double loss_per_missed_association; - bool last_weight_1; - - void set_defaults () - { - force_assignment = false; - C = 100; - verbose = false; - eps = 0.01; - num_threads = 2; - max_cache_size = 5; - loss_per_false_association = 1; - loss_per_missed_association = 1; - last_weight_1 = false; - } - - feature_extractor fe; - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_STRUCTURAL_ASSiGNMENT_TRAINER_Hh_ - - - - diff --git a/ml/dlib/dlib/svm/structural_assignment_trainer_abstract.h b/ml/dlib/dlib/svm/structural_assignment_trainer_abstract.h deleted file mode 100644 index ebd402d42..000000000 --- a/ml/dlib/dlib/svm/structural_assignment_trainer_abstract.h +++ /dev/null @@ -1,299 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_STRUCTURAL_ASSiGNMENT_TRAINER_ABSTRACT_Hh_ -#ifdef DLIB_STRUCTURAL_ASSiGNMENT_TRAINER_ABSTRACT_Hh_ - -#include "../algs.h" -#include "structural_svm_assignment_problem.h" -#include "assignment_function_abstract.h" - - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - class structural_assignment_trainer - { - /*! - REQUIREMENTS ON feature_extractor - It must be an object that implements an interface compatible with - the example_feature_extractor defined in dlib/svm/assignment_function_abstract.h. - - WHAT THIS OBJECT REPRESENTS - This object is a tool for learning to solve an assignment problem based - on a training dataset of example assignments. The training procedure produces an - assignment_function object which can be used to predict the assignments of - new data. - - Note that this is just a convenience wrapper around the - structural_svm_assignment_problem to make it look - similar to all the other trainers in dlib. - !*/ - - public: - typedef typename feature_extractor::lhs_element lhs_element; - typedef typename feature_extractor::rhs_element rhs_element; - typedef std::pair<std::vector<lhs_element>, std::vector<rhs_element> > sample_type; - typedef std::vector<long> label_type; - typedef assignment_function<feature_extractor> trained_function_type; - - structural_assignment_trainer ( - ); - /*! - ensures - - #get_c() == 100 - - this object isn't verbose - - #get_epsilon() == 0.01 - - #get_num_threads() == 2 - - #get_max_cache_size() == 5 - - #get_feature_extractor() == a default initialized feature_extractor - - #forces_assignment() == false - - #get_loss_per_false_association() == 1 - - #get_loss_per_missed_association() == 1 - - #forces_last_weight_to_1() == false - !*/ - - explicit structural_assignment_trainer ( - const feature_extractor& fe - ); - /*! - ensures - - #get_c() == 100 - - this object isn't verbose - - #get_epsilon() == 0.01 - - #get_num_threads() == 2 - - #get_max_cache_size() == 40 - - #get_feature_extractor() == fe - - #forces_assignment() == false - - #get_loss_per_false_association() == 1 - - #get_loss_per_missed_association() == 1 - - #forces_last_weight_to_1() == false - !*/ - - const feature_extractor& get_feature_extractor ( - ) const; - /*! - ensures - - returns the feature extractor used by this object - !*/ - - void set_num_threads ( - unsigned long num - ); - /*! - ensures - - #get_num_threads() == num - !*/ - - unsigned long get_num_threads ( - ) const; - /*! - ensures - - returns the number of threads used during training. You should - usually set this equal to the number of processing cores on your - machine. - !*/ - - void set_epsilon ( - double eps - ); - /*! - requires - - eps > 0 - ensures - - #get_epsilon() == eps - !*/ - - double get_epsilon ( - ) const; - /*! - ensures - - returns the error epsilon that determines when training should stop. - Smaller values may result in a more accurate solution but take longer - to train. You can think of this epsilon value as saying "solve the - optimization problem until the average number of assignment mistakes per - training sample is within epsilon of its optimal value". - !*/ - - void set_max_cache_size ( - unsigned long max_size - ); - /*! - ensures - - #get_max_cache_size() == max_size - !*/ - - unsigned long get_max_cache_size ( - ) const; - /*! - ensures - - During training, this object basically runs the assignment_function on - each training sample, over and over. To speed this up, it is possible to - cache the results of these invocations. This function returns the number - of cache elements per training sample kept in the cache. Note that a value - of 0 means caching is not used at all. - !*/ - - void set_loss_per_false_association ( - double loss - ); - /*! - requires - - loss > 0 - ensures - - #get_loss_per_false_association() == loss - !*/ - - double get_loss_per_false_association ( - ) const; - /*! - ensures - - returns the amount of loss experienced for associating two objects - together that shouldn't be associated. If you care more about avoiding - accidental associations than ensuring all possible associations are - identified then then you can increase this value. - !*/ - - void set_loss_per_missed_association ( - double loss - ); - /*! - requires - - loss > 0 - ensures - - #get_loss_per_missed_association() == loss - !*/ - - double get_loss_per_missed_association ( - ) const; - /*! - ensures - - returns the amount of loss experienced for failing to associate two - objects that are supposed to be associated. If you care more about - getting all the associations than avoiding accidentally associating - objects that shouldn't be associated then you can increase this value. - !*/ - - void be_verbose ( - ); - /*! - ensures - - This object will print status messages to standard out so that a - user can observe the progress of the algorithm. - !*/ - - void be_quiet ( - ); - /*! - ensures - - this object will not print anything to standard out - !*/ - - void set_oca ( - const oca& item - ); - /*! - ensures - - #get_oca() == item - !*/ - - const oca get_oca ( - ) const; - /*! - ensures - - returns a copy of the optimizer used to solve the structural SVM problem. - !*/ - - void set_c ( - double C - ); - /*! - requires - - C > 0 - ensures - - #get_c() = C - !*/ - - double get_c ( - ) const; - /*! - ensures - - returns the SVM regularization parameter. It is the parameter - that determines the trade-off between trying to fit the training - data (i.e. minimize the loss) or allowing more errors but hopefully - improving the generalization of the resulting assignment_function. - Larger values encourage exact fitting while smaller values of C may - encourage better generalization. - !*/ - - void set_forces_assignment ( - bool new_value - ); - /*! - ensures - - #forces_assignment() == new_value - !*/ - - bool forces_assignment( - ) const; - /*! - ensures - - returns the value of the forces_assignment() parameter for the - assignment_functions generated by this object. - !*/ - - bool forces_last_weight_to_1 ( - ) const; - /*! - ensures - - returns true if this trainer has the constraint that the last weight in - the learned parameter vector must be 1. This is the weight corresponding - to the feature in the training vectors with the highest dimension. - - Forcing the last weight to 1 also disables the bias and therefore the - get_bias() field of the learned assignment_function will be 0 when - forces_last_weight_to_1() == true. - !*/ - - void force_last_weight_to_1 ( - bool should_last_weight_be_1 - ); - /*! - ensures - - #forces_last_weight_to_1() == should_last_weight_be_1 - !*/ - - const assignment_function<feature_extractor> train ( - const std::vector<sample_type>& samples, - const std::vector<label_type>& labels - ) const; - /*! - requires - - is_assignment_problem(samples,labels) == true - - if (forces_assignment()) then - - is_forced_assignment_problem(samples,labels) == true - ensures - - Uses the structural_svm_assignment_problem to train an - assignment_function on the given samples/labels training pairs. - The idea is to learn to predict a label given an input sample. - - returns a function F with the following properties: - - F(new_sample) == A set of assignments indicating how the elements of - new_sample.first match up with the elements of new_sample.second. - - F.forces_assignment() == forces_assignment() - - F.get_feature_extractor() == get_feature_extractor() - - if (forces_last_weight_to_1()) then - - F.get_bias() == 0 - - F.get_weights()(F.get_weights().size()-1) == 1 - !*/ - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_STRUCTURAL_ASSiGNMENT_TRAINER_ABSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/svm/structural_graph_labeling_trainer.h b/ml/dlib/dlib/svm/structural_graph_labeling_trainer.h deleted file mode 100644 index 4d55c772b..000000000 --- a/ml/dlib/dlib/svm/structural_graph_labeling_trainer.h +++ /dev/null @@ -1,282 +0,0 @@ -// Copyright (C) 2012 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_STRUCTURAL_GRAPH_LABELING_tRAINER_Hh_ -#define DLIB_STRUCTURAL_GRAPH_LABELING_tRAINER_Hh_ - -#include "structural_graph_labeling_trainer_abstract.h" -#include "../algs.h" -#include "../optimization.h" -#include "structural_svm_graph_labeling_problem.h" -#include "../graph_cuts/graph_labeler.h" - - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename vector_type - > - class structural_graph_labeling_trainer - { - public: - typedef std::vector<bool> label_type; - typedef graph_labeler<vector_type> trained_function_type; - - structural_graph_labeling_trainer ( - ) - { - C = 10; - verbose = false; - eps = 0.1; - num_threads = 2; - max_cache_size = 5; - loss_pos = 1.0; - loss_neg = 1.0; - } - - void set_num_threads ( - unsigned long num - ) - { - num_threads = num; - } - - unsigned long get_num_threads ( - ) const - { - return num_threads; - } - - void set_epsilon ( - double eps_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(eps_ > 0, - "\t void structural_graph_labeling_trainer::set_epsilon()" - << "\n\t eps_ must be greater than 0" - << "\n\t eps_: " << eps_ - << "\n\t this: " << this - ); - - eps = eps_; - } - - double get_epsilon ( - ) const { return eps; } - - void set_max_cache_size ( - unsigned long max_size - ) - { - max_cache_size = max_size; - } - - unsigned long get_max_cache_size ( - ) const - { - return max_cache_size; - } - - void be_verbose ( - ) - { - verbose = true; - } - - void be_quiet ( - ) - { - verbose = false; - } - - void set_oca ( - const oca& item - ) - { - solver = item; - } - - const oca get_oca ( - ) const - { - return solver; - } - - void set_c ( - double C_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(C_ > 0, - "\t void structural_graph_labeling_trainer::set_c()" - << "\n\t C_ must be greater than 0" - << "\n\t C_: " << C_ - << "\n\t this: " << this - ); - - C = C_; - } - - double get_c ( - ) const - { - return C; - } - - - void set_loss_on_positive_class ( - double loss - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(loss >= 0, - "\t structural_graph_labeling_trainer::set_loss_on_positive_class()" - << "\n\t Invalid inputs were given to this function." - << "\n\t loss: " << loss - << "\n\t this: " << this ); - - loss_pos = loss; - } - - void set_loss_on_negative_class ( - double loss - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(loss >= 0, - "\t structural_graph_labeling_trainer::set_loss_on_negative_class()" - << "\n\t Invalid inputs were given to this function." - << "\n\t loss: " << loss - << "\n\t this: " << this ); - - loss_neg = loss; - } - - double get_loss_on_negative_class ( - ) const { return loss_neg; } - - double get_loss_on_positive_class ( - ) const { return loss_pos; } - - - template < - typename graph_type - > - const graph_labeler<vector_type> train ( - const dlib::array<graph_type>& samples, - const std::vector<label_type>& labels, - const std::vector<std::vector<double> >& losses - ) const - { -#ifdef ENABLE_ASSERTS - std::string reason_for_failure; - DLIB_ASSERT(is_graph_labeling_problem(samples, labels, reason_for_failure) == true , - "\t void structural_graph_labeling_trainer::train()" - << "\n\t Invalid inputs were given to this function." - << "\n\t reason_for_failure: " << reason_for_failure - << "\n\t samples.size(): " << samples.size() - << "\n\t labels.size(): " << labels.size() - << "\n\t this: " << this ); - DLIB_ASSERT((losses.size() == 0 || sizes_match(labels, losses) == true) && - all_values_are_nonnegative(losses) == true, - "\t void structural_graph_labeling_trainer::train()" - << "\n\t Invalid inputs were given to this function." - << "\n\t labels.size(): " << labels.size() - << "\n\t losses.size(): " << losses.size() - << "\n\t sizes_match(labels,losses): " << sizes_match(labels,losses) - << "\n\t all_values_are_nonnegative(losses): " << all_values_are_nonnegative(losses) - << "\n\t this: " << this ); -#endif - - - structural_svm_graph_labeling_problem<graph_type> prob(samples, labels, losses, num_threads); - - if (verbose) - prob.be_verbose(); - - prob.set_c(C); - prob.set_epsilon(eps); - prob.set_max_cache_size(max_cache_size); - if (prob.get_losses().size() == 0) - { - prob.set_loss_on_positive_class(loss_pos); - prob.set_loss_on_negative_class(loss_neg); - } - - matrix<double,0,1> w; - solver(prob, w, prob.get_num_edge_weights()); - - vector_type edge_weights; - vector_type node_weights; - populate_weights(w, edge_weights, node_weights, prob.get_num_edge_weights()); - return graph_labeler<vector_type>(edge_weights, node_weights); - } - - template < - typename graph_type - > - const graph_labeler<vector_type> train ( - const dlib::array<graph_type>& samples, - const std::vector<label_type>& labels - ) const - { - std::vector<std::vector<double> > losses; - return train(samples, labels, losses); - } - - private: - - template <typename T> - typename enable_if<is_matrix<T> >::type populate_weights ( - const matrix<double,0,1>& w, - T& edge_weights, - T& node_weights, - long split_idx - ) const - { - edge_weights = rowm(w,range(0, split_idx-1)); - node_weights = rowm(w,range(split_idx,w.size()-1)); - } - - template <typename T> - typename disable_if<is_matrix<T> >::type populate_weights ( - const matrix<double,0,1>& w, - T& edge_weights, - T& node_weights, - long split_idx - ) const - { - edge_weights.clear(); - node_weights.clear(); - for (long i = 0; i < split_idx; ++i) - { - if (w(i) != 0) - edge_weights.insert(edge_weights.end(), std::make_pair(i,w(i))); - } - for (long i = split_idx; i < w.size(); ++i) - { - if (w(i) != 0) - node_weights.insert(node_weights.end(), std::make_pair(i-split_idx,w(i))); - } - } - - - double C; - oca solver; - double eps; - bool verbose; - unsigned long num_threads; - unsigned long max_cache_size; - double loss_pos; - double loss_neg; - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_STRUCTURAL_GRAPH_LABELING_tRAINER_Hh_ - diff --git a/ml/dlib/dlib/svm/structural_graph_labeling_trainer_abstract.h b/ml/dlib/dlib/svm/structural_graph_labeling_trainer_abstract.h deleted file mode 100644 index df88096a0..000000000 --- a/ml/dlib/dlib/svm/structural_graph_labeling_trainer_abstract.h +++ /dev/null @@ -1,265 +0,0 @@ -// Copyright (C) 2012 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_STRUCTURAL_GRAPH_LABELING_tRAINER_ABSTRACT_Hh_ -#ifdef DLIB_STRUCTURAL_GRAPH_LABELING_tRAINER_ABSTRACT_Hh_ - -#include "../algs.h" -#include "../optimization.h" -#include "structural_svm_graph_labeling_problem_abstract.h" -#include "../graph_cuts/graph_labeler_abstract.h" - - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename vector_type - > - class structural_graph_labeling_trainer - { - /*! - REQUIREMENTS ON vector_type - - vector_type is a dlib::matrix capable of representing column - vectors or it is a sparse vector type as defined in dlib/svm/sparse_vector_abstract.h. - - WHAT THIS OBJECT REPRESENTS - This object is a tool for learning to solve a graph labeling problem based - on a training dataset of example labeled graphs. The training procedure - produces a graph_labeler object which can be used to predict the labelings - of new graphs. - - Note that this is just a convenience wrapper around the - structural_svm_graph_labeling_problem to make it look - similar to all the other trainers in dlib. - !*/ - - public: - typedef std::vector<bool> label_type; - typedef graph_labeler<vector_type> trained_function_type; - - structural_graph_labeling_trainer ( - ); - /*! - ensures - - #get_c() == 10 - - this object isn't verbose - - #get_epsilon() == 0.1 - - #get_num_threads() == 2 - - #get_max_cache_size() == 5 - - #get_loss_on_positive_class() == 1.0 - - #get_loss_on_negative_class() == 1.0 - !*/ - - void set_num_threads ( - unsigned long num - ); - /*! - ensures - - #get_num_threads() == num - !*/ - - unsigned long get_num_threads ( - ) const; - /*! - ensures - - returns the number of threads used during training. You should - usually set this equal to the number of processing cores on your - machine. - !*/ - - void set_epsilon ( - double eps - ); - /*! - requires - - eps > 0 - ensures - - #get_epsilon() == eps - !*/ - - double get_epsilon ( - ) const; - /*! - ensures - - returns the error epsilon that determines when training should stop. - Smaller values may result in a more accurate solution but take longer - to train. You can think of this epsilon value as saying "solve the - optimization problem until the average number of labeling mistakes per - example graph is within epsilon of its optimal value". - !*/ - - void set_max_cache_size ( - unsigned long max_size - ); - /*! - ensures - - #get_max_cache_size() == max_size - !*/ - - unsigned long get_max_cache_size ( - ) const; - /*! - ensures - - During training, this object basically runs the graph_labeler on each - training sample, over and over. To speed this up, it is possible to - cache the results of these invocations. This function returns the number - of cache elements per training sample kept in the cache. Note that a value - of 0 means caching is not used at all. - !*/ - - void be_verbose ( - ); - /*! - ensures - - This object will print status messages to standard out so that a - user can observe the progress of the algorithm. - !*/ - - void be_quiet ( - ); - /*! - ensures - - this object will not print anything to standard out - !*/ - - void set_oca ( - const oca& item - ); - /*! - ensures - - #get_oca() == item - !*/ - - const oca get_oca ( - ) const; - /*! - ensures - - returns a copy of the optimizer used to solve the structural SVM problem. - !*/ - - void set_c ( - double C - ); - /*! - requires - - C > 0 - ensures - - #get_c() = C - !*/ - - double get_c ( - ) const; - /*! - ensures - - returns the SVM regularization parameter. It is the parameter - that determines the trade-off between trying to fit the training - data (i.e. minimize the loss) or allowing more errors but hopefully - improving the generalization of the resulting graph_labeler. Larger - values encourage exact fitting while smaller values of C may encourage - better generalization. - !*/ - - void set_loss_on_positive_class ( - double loss - ); - /*! - requires - - loss >= 0 - ensures - - #get_loss_on_positive_class() == loss - !*/ - - void set_loss_on_negative_class ( - double loss - ); - /*! - requires - - loss >= 0 - ensures - - #get_loss_on_negative_class() == loss - !*/ - - double get_loss_on_positive_class ( - ) const; - /*! - ensures - - returns the loss incurred when a graph node which is supposed to have - a label of true gets misclassified. This value controls how much we care - about correctly classifying nodes which should be labeled as true. Larger - loss values indicate that we care more strongly than smaller values. - !*/ - - double get_loss_on_negative_class ( - ) const; - /*! - ensures - - returns the loss incurred when a graph node which is supposed to have - a label of false gets misclassified. This value controls how much we care - about correctly classifying nodes which should be labeled as false. Larger - loss values indicate that we care more strongly than smaller values. - !*/ - - template < - typename graph_type - > - const graph_labeler<vector_type> train ( - const dlib::array<graph_type>& samples, - const std::vector<label_type>& labels - ) const; - /*! - requires - - is_graph_labeling_problem(samples,labels) == true - ensures - - Uses the structural_svm_graph_labeling_problem to train a graph_labeler - on the given samples/labels training pairs. The idea is to learn to - predict a label given an input sample. - - The values of get_loss_on_positive_class() and get_loss_on_negative_class() - are used to determine how to value mistakes on each node during training. - - returns a function F with the following properties: - - F(new_sample) == The predicted labels for the nodes in the graph - new_sample. - !*/ - - template < - typename graph_type - > - const graph_labeler<vector_type> train ( - const dlib::array<graph_type>& samples, - const std::vector<label_type>& labels, - const std::vector<std::vector<double> >& losses - ) const; - /*! - requires - - is_graph_labeling_problem(samples,labels) == true - - if (losses.size() != 0) then - - sizes_match(labels, losses) == true - - all_values_are_nonnegative(losses) == true - ensures - - Uses the structural_svm_graph_labeling_problem to train a graph_labeler - on the given samples/labels training pairs. The idea is to learn to - predict a label given an input sample. - - returns a function F with the following properties: - - F(new_sample) == The predicted labels for the nodes in the graph - new_sample. - - if (losses.size() == 0) then - - The values of get_loss_on_positive_class() and get_loss_on_negative_class() - are used to determine how to value mistakes on each node during training. - - The losses argument is effectively ignored if its size is zero. - - else - - Each node in the training data has its own loss value defined by the - corresponding entry of losses. In particular, this means that the - node with label labels[i][j] incurs a loss of losses[i][j] if it is - incorrectly labeled. - - The get_loss_on_positive_class() and get_loss_on_negative_class() - parameters are ignored. Only losses is used in this case. - !*/ - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_STRUCTURAL_GRAPH_LABELING_tRAINER_ABSTRACT_Hh_ - diff --git a/ml/dlib/dlib/svm/structural_object_detection_trainer.h b/ml/dlib/dlib/svm/structural_object_detection_trainer.h deleted file mode 100644 index bdf8c5b5c..000000000 --- a/ml/dlib/dlib/svm/structural_object_detection_trainer.h +++ /dev/null @@ -1,402 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_STRUCTURAL_OBJECT_DETECTION_TRAiNER_Hh_ -#define DLIB_STRUCTURAL_OBJECT_DETECTION_TRAiNER_Hh_ - -#include "structural_object_detection_trainer_abstract.h" -#include "../algs.h" -#include "../optimization.h" -#include "structural_svm_object_detection_problem.h" -#include "../image_processing/object_detector.h" -#include "../image_processing/box_overlap_testing.h" -#include "../image_processing/full_object_detection.h" - - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_scanner_type, - typename svm_struct_prob_type - > - void configure_nuclear_norm_regularizer ( - const image_scanner_type&, - svm_struct_prob_type& - ) - { - // does nothing by default. Specific scanner types overload this function to do - // whatever is appropriate. - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_scanner_type - > - class structural_object_detection_trainer : noncopyable - { - - public: - typedef double scalar_type; - typedef default_memory_manager mem_manager_type; - typedef object_detector<image_scanner_type> trained_function_type; - - - explicit structural_object_detection_trainer ( - const image_scanner_type& scanner_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(scanner_.get_num_detection_templates() > 0, - "\t structural_object_detection_trainer::structural_object_detection_trainer(scanner_)" - << "\n\t You can't have zero detection templates" - << "\n\t this: " << this - ); - - C = 1; - verbose = false; - eps = 0.1; - num_threads = 2; - max_cache_size = 5; - match_eps = 0.5; - loss_per_missed_target = 1; - loss_per_false_alarm = 1; - - scanner.copy_configuration(scanner_); - - auto_overlap_tester = true; - } - - const image_scanner_type& get_scanner ( - ) const - { - return scanner; - } - - bool auto_set_overlap_tester ( - ) const - { - return auto_overlap_tester; - } - - void set_overlap_tester ( - const test_box_overlap& tester - ) - { - overlap_tester = tester; - auto_overlap_tester = false; - } - - test_box_overlap get_overlap_tester ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(auto_set_overlap_tester() == false, - "\t test_box_overlap structural_object_detection_trainer::get_overlap_tester()" - << "\n\t You can't call this function if the overlap tester is generated dynamically." - << "\n\t this: " << this - ); - - return overlap_tester; - } - - void set_num_threads ( - unsigned long num - ) - { - num_threads = num; - } - - unsigned long get_num_threads ( - ) const - { - return num_threads; - } - - void set_epsilon ( - scalar_type eps_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(eps_ > 0, - "\t void structural_object_detection_trainer::set_epsilon()" - << "\n\t eps_ must be greater than 0" - << "\n\t eps_: " << eps_ - << "\n\t this: " << this - ); - - eps = eps_; - } - - scalar_type get_epsilon ( - ) const { return eps; } - - void set_max_cache_size ( - unsigned long max_size - ) - { - max_cache_size = max_size; - } - - unsigned long get_max_cache_size ( - ) const - { - return max_cache_size; - } - - void be_verbose ( - ) - { - verbose = true; - } - - void be_quiet ( - ) - { - verbose = false; - } - - void set_oca ( - const oca& item - ) - { - solver = item; - } - - const oca get_oca ( - ) const - { - return solver; - } - - void set_c ( - scalar_type C_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(C_ > 0, - "\t void structural_object_detection_trainer::set_c()" - << "\n\t C_ must be greater than 0" - << "\n\t C_: " << C_ - << "\n\t this: " << this - ); - - C = C_; - } - - scalar_type get_c ( - ) const - { - return C; - } - - void set_match_eps ( - double eps - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(0 < eps && eps < 1, - "\t void structural_object_detection_trainer::set_match_eps(eps)" - << "\n\t Invalid inputs were given to this function " - << "\n\t eps: " << eps - << "\n\t this: " << this - ); - - match_eps = eps; - } - - double get_match_eps ( - ) const - { - return match_eps; - } - - double get_loss_per_missed_target ( - ) const - { - return loss_per_missed_target; - } - - void set_loss_per_missed_target ( - double loss - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(loss > 0, - "\t void structural_object_detection_trainer::set_loss_per_missed_target(loss)" - << "\n\t Invalid inputs were given to this function " - << "\n\t loss: " << loss - << "\n\t this: " << this - ); - - loss_per_missed_target = loss; - } - - double get_loss_per_false_alarm ( - ) const - { - return loss_per_false_alarm; - } - - void set_loss_per_false_alarm ( - double loss - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(loss > 0, - "\t void structural_object_detection_trainer::set_loss_per_false_alarm(loss)" - << "\n\t Invalid inputs were given to this function " - << "\n\t loss: " << loss - << "\n\t this: " << this - ); - - loss_per_false_alarm = loss; - } - - template < - typename image_array_type - > - const trained_function_type train ( - const image_array_type& images, - const std::vector<std::vector<full_object_detection> >& truth_object_detections - ) const - { - std::vector<std::vector<rectangle> > empty_ignore(images.size()); - return train_impl(images, truth_object_detections, empty_ignore, test_box_overlap()); - } - - template < - typename image_array_type - > - const trained_function_type train ( - const image_array_type& images, - const std::vector<std::vector<full_object_detection> >& truth_object_detections, - const std::vector<std::vector<rectangle> >& ignore, - const test_box_overlap& ignore_overlap_tester = test_box_overlap() - ) const - { - return train_impl(images, truth_object_detections, ignore, ignore_overlap_tester); - } - - template < - typename image_array_type - > - const trained_function_type train ( - const image_array_type& images, - const std::vector<std::vector<rectangle> >& truth_object_detections - ) const - { - std::vector<std::vector<rectangle> > empty_ignore(images.size()); - return train(images, truth_object_detections, empty_ignore, test_box_overlap()); - } - - template < - typename image_array_type - > - const trained_function_type train ( - const image_array_type& images, - const std::vector<std::vector<rectangle> >& truth_object_detections, - const std::vector<std::vector<rectangle> >& ignore, - const test_box_overlap& ignore_overlap_tester = test_box_overlap() - ) const - { - std::vector<std::vector<full_object_detection> > truth_dets(truth_object_detections.size()); - for (unsigned long i = 0; i < truth_object_detections.size(); ++i) - { - for (unsigned long j = 0; j < truth_object_detections[i].size(); ++j) - { - truth_dets[i].push_back(full_object_detection(truth_object_detections[i][j])); - } - } - - return train_impl(images, truth_dets, ignore, ignore_overlap_tester); - } - - private: - - template < - typename image_array_type - > - const trained_function_type train_impl ( - const image_array_type& images, - const std::vector<std::vector<full_object_detection> >& truth_object_detections, - const std::vector<std::vector<rectangle> >& ignore, - const test_box_overlap& ignore_overlap_tester - ) const - { -#ifdef ENABLE_ASSERTS - // make sure requires clause is not broken - DLIB_ASSERT(is_learning_problem(images,truth_object_detections) == true && images.size() == ignore.size(), - "\t trained_function_type structural_object_detection_trainer::train()" - << "\n\t invalid inputs were given to this function" - << "\n\t images.size(): " << images.size() - << "\n\t ignore.size(): " << ignore.size() - << "\n\t truth_object_detections.size(): " << truth_object_detections.size() - << "\n\t is_learning_problem(images,truth_object_detections): " << is_learning_problem(images,truth_object_detections) - ); - for (unsigned long i = 0; i < truth_object_detections.size(); ++i) - { - for (unsigned long j = 0; j < truth_object_detections[i].size(); ++j) - { - DLIB_ASSERT(truth_object_detections[i][j].num_parts() == get_scanner().get_num_movable_components_per_detection_template() && - all_parts_in_rect(truth_object_detections[i][j]) == true, - "\t trained_function_type structural_object_detection_trainer::train()" - << "\n\t invalid inputs were given to this function" - << "\n\t truth_object_detections["<<i<<"]["<<j<<"].num_parts(): " << - truth_object_detections[i][j].num_parts() - << "\n\t get_scanner().get_num_movable_components_per_detection_template(): " << - get_scanner().get_num_movable_components_per_detection_template() - << "\n\t all_parts_in_rect(truth_object_detections["<<i<<"]["<<j<<"]): " << all_parts_in_rect(truth_object_detections[i][j]) - ); - } - } -#endif - - structural_svm_object_detection_problem<image_scanner_type,image_array_type > - svm_prob(scanner, overlap_tester, auto_overlap_tester, images, - truth_object_detections, ignore, ignore_overlap_tester, num_threads); - - if (verbose) - svm_prob.be_verbose(); - - svm_prob.set_c(C); - svm_prob.set_epsilon(eps); - svm_prob.set_max_cache_size(max_cache_size); - svm_prob.set_match_eps(match_eps); - svm_prob.set_loss_per_missed_target(loss_per_missed_target); - svm_prob.set_loss_per_false_alarm(loss_per_false_alarm); - configure_nuclear_norm_regularizer(scanner, svm_prob); - matrix<double,0,1> w; - - // Run the optimizer to find the optimal w. - solver(svm_prob,w); - - // report the results of the training. - return object_detector<image_scanner_type>(scanner, svm_prob.get_overlap_tester(), w); - } - - image_scanner_type scanner; - test_box_overlap overlap_tester; - - double C; - oca solver; - double eps; - double match_eps; - bool verbose; - unsigned long num_threads; - unsigned long max_cache_size; - double loss_per_missed_target; - double loss_per_false_alarm; - bool auto_overlap_tester; - - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_STRUCTURAL_OBJECT_DETECTION_TRAiNER_Hh_ - - diff --git a/ml/dlib/dlib/svm/structural_object_detection_trainer_abstract.h b/ml/dlib/dlib/svm/structural_object_detection_trainer_abstract.h deleted file mode 100644 index 2dd799874..000000000 --- a/ml/dlib/dlib/svm/structural_object_detection_trainer_abstract.h +++ /dev/null @@ -1,390 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_STRUCTURAL_OBJECT_DETECTION_TRAiNER_H_ABSTRACTh_ -#ifdef DLIB_STRUCTURAL_OBJECT_DETECTION_TRAiNER_H_ABSTRACTh_ - -#include "structural_svm_object_detection_problem_abstract.h" -#include "../image_processing/object_detector_abstract.h" -#include "../image_processing/box_overlap_testing_abstract.h" -#include "../image_processing/full_object_detection_abstract.h" - - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_scanner_type - > - class structural_object_detection_trainer : noncopyable - { - /*! - REQUIREMENTS ON image_scanner_type - image_scanner_type must be an implementation of - dlib/image_processing/scan_fhog_pyramid_abstract.h or - dlib/image_processing/scan_image_custom_abstract.h or - dlib/image_processing/scan_image_pyramid_abstract.h or - dlib/image_processing/scan_image_boxes_abstract.h - - WHAT THIS OBJECT REPRESENTS - This object is a tool for learning to detect objects in images based on a - set of labeled images. The training procedure produces an object_detector - which can be used to predict the locations of objects in new images. - - Note that this is just a convenience wrapper around the structural_svm_object_detection_problem - to make it look similar to all the other trainers in dlib. - !*/ - - public: - typedef double scalar_type; - typedef default_memory_manager mem_manager_type; - typedef object_detector<image_scanner_type> trained_function_type; - - - explicit structural_object_detection_trainer ( - const image_scanner_type& scanner - ); - /*! - requires - - scanner.get_num_detection_templates() > 0 - ensures - - #get_c() == 1 - - this object isn't verbose - - #get_epsilon() == 0.1 - - #get_num_threads() == 2 - - #get_max_cache_size() == 5 - - #get_match_eps() == 0.5 - - #get_loss_per_missed_target() == 1 - - #get_loss_per_false_alarm() == 1 - - This object will attempt to learn a model for the given - scanner object when train() is called. - - #get_scanner() == scanner - (note that only the "configuration" of scanner is copied. - I.e. the copy is done using copy_configuration()) - - #auto_set_overlap_tester() == true - !*/ - - const image_scanner_type& get_scanner ( - ) const; - /*! - ensures - - returns the image scanner used by this object. - !*/ - - bool auto_set_overlap_tester ( - ) const; - /*! - ensures - - if (this object will automatically determine an appropriate - state for the overlap tester used for non-max suppression.) then - - returns true - - In this case, it is determined using the find_tight_overlap_tester() - routine based on the truth_object_detections given to the - structural_object_detection_trainer::train() method. - - else - - returns false - !*/ - - void set_overlap_tester ( - const test_box_overlap& tester - ); - /*! - ensures - - #get_overlap_tester() == tester - - #auto_set_overlap_tester() == false - !*/ - - test_box_overlap get_overlap_tester ( - ) const; - /*! - requires - - auto_set_overlap_tester() == false - ensures - - returns the overlap tester object which will be used to perform non-max suppression. - In particular, this function returns the overlap tester which will populate the - object_detector returned by train(). - !*/ - - void set_num_threads ( - unsigned long num - ); - /*! - ensures - - #get_num_threads() == num - !*/ - - unsigned long get_num_threads ( - ) const; - /*! - ensures - - returns the number of threads used during training. You should - usually set this equal to the number of processing cores on your - machine. - !*/ - - void set_epsilon ( - scalar_type eps - ); - /*! - requires - - eps > 0 - ensures - - #get_epsilon() == eps - !*/ - - const scalar_type get_epsilon ( - ) const; - /*! - ensures - - returns the error epsilon that determines when training should stop. - Smaller values may result in a more accurate solution but take longer - to train. You can think of this epsilon value as saying "solve the - optimization problem until the average loss per sample is within epsilon - of its optimal value". - !*/ - - void set_max_cache_size ( - unsigned long max_size - ); - /*! - ensures - - #get_max_cache_size() == max_size - !*/ - - unsigned long get_max_cache_size ( - ) const; - /*! - ensures - - During training, this object basically runs the object detector on - each image, over and over. To speed this up, it is possible to cache - the results of these detector invocations. This function returns the - number of cache elements per training sample kept in the cache. Note - that a value of 0 means caching is not used at all. Note also that - each cache element takes up about sizeof(double)*scanner.get_num_dimensions() - memory (where scanner is the scanner given to this object's constructor). - !*/ - - void be_verbose ( - ); - /*! - ensures - - This object will print status messages to standard out so that a - user can observe the progress of the algorithm. - !*/ - - void be_quiet ( - ); - /*! - ensures - - this object will not print anything to standard out - !*/ - - void set_oca ( - const oca& item - ); - /*! - ensures - - #get_oca() == item - !*/ - - const oca get_oca ( - ) const; - /*! - ensures - - returns a copy of the optimizer used to solve the structural SVM problem. - !*/ - - void set_c ( - scalar_type C - ); - /*! - requires - - C > 0 - ensures - - #get_c() = C - !*/ - - const scalar_type get_c ( - ) const; - /*! - ensures - - returns the SVM regularization parameter. It is the parameter - that determines the trade-off between trying to fit the training - data (i.e. minimize the loss) or allowing more errors but hopefully - improving the generalization of the resulting detector. Larger - values encourage exact fitting while smaller values of C may encourage - better generalization. - !*/ - - void set_match_eps ( - double eps - ); - /*! - requires - - 0 < eps < 1 - ensures - - #get_match_eps() == eps - !*/ - - double get_match_eps ( - ) const; - /*! - ensures - - returns the amount of alignment necessary for a detection to be considered - as matching with a ground truth rectangle. If it doesn't match then - it is considered to be a false alarm. To define this precisely, let - A and B be two rectangles, then A and B match if and only if: - A.intersect(B).area()/(A+B).area() > get_match_eps() - !*/ - - double get_loss_per_missed_target ( - ) const; - /*! - ensures - - returns the amount of loss experienced for failing to detect one of the - targets. If you care more about finding targets than having a low false - alarm rate then you can increase this value. - !*/ - - void set_loss_per_missed_target ( - double loss - ); - /*! - requires - - loss > 0 - ensures - - #get_loss_per_missed_target() == loss - !*/ - - double get_loss_per_false_alarm ( - ) const; - /*! - ensures - - returns the amount of loss experienced for emitting a false alarm detection. - Or in other words, the loss for generating a detection that doesn't correspond - to one of the truth rectangles. If you care more about having a low false - alarm rate than finding all the targets then you can increase this value. - !*/ - - void set_loss_per_false_alarm ( - double loss - ); - /*! - requires - - loss > 0 - ensures - - #get_loss_per_false_alarm() == loss - !*/ - - template < - typename image_array_type - > - const trained_function_type train ( - const image_array_type& images, - const std::vector<std::vector<full_object_detection> >& truth_object_detections - ) const; - /*! - requires - - is_learning_problem(images, truth_object_detections) == true - - it must be valid to pass images[0] into the image_scanner_type::load() method. - (also, image_array_type must be an implementation of dlib/array/array_kernel_abstract.h) - - for all valid i, j: - - truth_object_detections[i][j].num_parts() == get_scanner().get_num_movable_components_per_detection_template() - - all_parts_in_rect(truth_object_detections[i][j]) == true - ensures - - Uses the structural_svm_object_detection_problem to train an object_detector - on the given images and truth_object_detections. - - returns a function F with the following properties: - - F(new_image) == A prediction of what objects are present in new_image. This - is a set of rectangles indicating their positions. - !*/ - - template < - typename image_array_type - > - const trained_function_type train ( - const image_array_type& images, - const std::vector<std::vector<rectangle> >& truth_object_detections - ) const; - /*! - requires - - is_learning_problem(images, truth_object_detections) == true - - it must be valid to pass images[0] into the image_scanner_type::load() method. - (also, image_array_type must be an implementation of dlib/array/array_kernel_abstract.h) - - get_scanner().get_num_movable_components_per_detection_template() == 0 - ensures - - This function is identical to the above train(), except that it converts - each element of truth_object_detections into a full_object_detection by - passing it to full_object_detection's constructor taking only a rectangle. - Therefore, this version of train() is a convenience function for for the - case where you don't have any movable components of the detection templates. - !*/ - - template < - typename image_array_type - > - const trained_function_type train ( - const image_array_type& images, - const std::vector<std::vector<full_object_detection> >& truth_object_detections, - const std::vector<std::vector<rectangle> >& ignore, - const test_box_overlap& ignore_overlap_tester = test_box_overlap() - ) const; - /*! - requires - - is_learning_problem(images, truth_object_detections) == true - - it must be valid to pass images[0] into the image_scanner_type::load() method. - (also, image_array_type must be an implementation of dlib/array/array_kernel_abstract.h) - - ignore.size() == images.size() - - for all valid i, j: - - truth_object_detections[i][j].num_parts() == get_scanner().get_num_movable_components_per_detection_template() - - all_parts_in_rect(truth_object_detections[i][j]) == true - ensures - - Uses the structural_svm_object_detection_problem to train an object_detector - on the given images and truth_object_detections. - - for all valid i: - - Within images[i] any detections that match against a rectangle in - ignore[i], according to ignore_overlap_tester, are ignored. That is, - the optimizer doesn't care if the detector outputs a detection that - matches any of the ignore rectangles or if it fails to output a - detection for an ignore rectangle. Therefore, if there are objects - in your dataset that you are unsure if you want to detect or otherwise - don't care if the detector gets or doesn't then you can mark them - with ignore rectangles and the optimizer will simply ignore them. - - returns a function F with the following properties: - - F(new_image) == A prediction of what objects are present in new_image. This - is a set of rectangles indicating their positions. - !*/ - - template < - typename image_array_type - > - const trained_function_type train ( - const image_array_type& images, - const std::vector<std::vector<rectangle> >& truth_object_detections, - const std::vector<std::vector<rectangle> >& ignore, - const test_box_overlap& ignore_overlap_tester = test_box_overlap() - ) const; - /*! - requires - - is_learning_problem(images, truth_object_detections) == true - - ignore.size() == images.size() - - it must be valid to pass images[0] into the image_scanner_type::load() method. - (also, image_array_type must be an implementation of dlib/array/array_kernel_abstract.h) - - get_scanner().get_num_movable_components_per_detection_template() == 0 - ensures - - This function is identical to the above train(), except that it converts - each element of truth_object_detections into a full_object_detection by - passing it to full_object_detection's constructor taking only a rectangle. - Therefore, this version of train() is a convenience function for for the - case where you don't have any movable components of the detection templates. - !*/ - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_STRUCTURAL_OBJECT_DETECTION_TRAiNER_H_ABSTRACTh_ - - diff --git a/ml/dlib/dlib/svm/structural_sequence_labeling_trainer.h b/ml/dlib/dlib/svm/structural_sequence_labeling_trainer.h deleted file mode 100644 index 9b61fd6c2..000000000 --- a/ml/dlib/dlib/svm/structural_sequence_labeling_trainer.h +++ /dev/null @@ -1,271 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_STRUCTURAL_SEQUENCE_LABELING_TRAiNER_Hh_ -#define DLIB_STRUCTURAL_SEQUENCE_LABELING_TRAiNER_Hh_ - -#include "structural_sequence_labeling_trainer_abstract.h" -#include "../algs.h" -#include "../optimization.h" -#include "structural_svm_sequence_labeling_problem.h" -#include "num_nonnegative_weights.h" - - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - class structural_sequence_labeling_trainer - { - public: - typedef typename feature_extractor::sequence_type sample_sequence_type; - typedef std::vector<unsigned long> labeled_sequence_type; - - typedef sequence_labeler<feature_extractor> trained_function_type; - - explicit structural_sequence_labeling_trainer ( - const feature_extractor& fe_ - ) : fe(fe_) - { - set_defaults(); - } - - structural_sequence_labeling_trainer ( - ) - { - set_defaults(); - } - - const feature_extractor& get_feature_extractor ( - ) const { return fe; } - - unsigned long num_labels ( - ) const { return fe.num_labels(); } - - void set_num_threads ( - unsigned long num - ) - { - num_threads = num; - } - - unsigned long get_num_threads ( - ) const - { - return num_threads; - } - - void set_epsilon ( - double eps_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(eps_ > 0, - "\t void structural_sequence_labeling_trainer::set_epsilon()" - << "\n\t eps_ must be greater than 0" - << "\n\t eps_: " << eps_ - << "\n\t this: " << this - ); - - eps = eps_; - } - - double get_epsilon ( - ) const { return eps; } - - unsigned long get_max_iterations ( - ) const { return max_iterations; } - - void set_max_iterations ( - unsigned long max_iter - ) - { - max_iterations = max_iter; - } - - void set_max_cache_size ( - unsigned long max_size - ) - { - max_cache_size = max_size; - } - - unsigned long get_max_cache_size ( - ) const - { - return max_cache_size; - } - - void be_verbose ( - ) - { - verbose = true; - } - - void be_quiet ( - ) - { - verbose = false; - } - - void set_oca ( - const oca& item - ) - { - solver = item; - } - - const oca get_oca ( - ) const - { - return solver; - } - - void set_c ( - double C_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(C_ > 0, - "\t void structural_sequence_labeling_trainer::set_c()" - << "\n\t C_ must be greater than 0" - << "\n\t C_: " << C_ - << "\n\t this: " << this - ); - - C = C_; - } - - double get_c ( - ) const - { - return C; - } - - double get_loss ( - unsigned long label - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(label < num_labels(), - "\t void structural_sequence_labeling_trainer::get_loss()" - << "\n\t invalid inputs were given to this function" - << "\n\t label: " << label - << "\n\t num_labels(): " << num_labels() - << "\n\t this: " << this - ); - - return loss_values[label]; - } - - void set_loss ( - unsigned long label, - double value - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(label < num_labels() && value >= 0, - "\t void structural_sequence_labeling_trainer::set_loss()" - << "\n\t invalid inputs were given to this function" - << "\n\t label: " << label - << "\n\t num_labels(): " << num_labels() - << "\n\t value: " << value - << "\n\t this: " << this - ); - - loss_values[label] = value; - } - - - const sequence_labeler<feature_extractor> train( - const std::vector<sample_sequence_type>& x, - const std::vector<labeled_sequence_type>& y - ) const - { - - // make sure requires clause is not broken - DLIB_ASSERT(is_sequence_labeling_problem(x,y) == true && - contains_invalid_labeling(get_feature_extractor(), x, y) == false, - "\t sequence_labeler structural_sequence_labeling_trainer::train(x,y)" - << "\n\t invalid inputs were given to this function" - << "\n\t x.size(): " << x.size() - << "\n\t is_sequence_labeling_problem(x,y): " << is_sequence_labeling_problem(x,y) - << "\n\t contains_invalid_labeling(get_feature_extractor(),x,y): " << contains_invalid_labeling(get_feature_extractor(),x,y) - << "\n\t this: " << this - ); - -#ifdef ENABLE_ASSERTS - for (unsigned long i = 0; i < y.size(); ++i) - { - for (unsigned long j = 0; j < y[i].size(); ++j) - { - // make sure requires clause is not broken - DLIB_ASSERT(y[i][j] < num_labels(), - "\t sequence_labeler structural_sequence_labeling_trainer::train(x,y)" - << "\n\t The given labels in y are invalid." - << "\n\t y[i][j]: " << y[i][j] - << "\n\t num_labels(): " << num_labels() - << "\n\t i: " << i - << "\n\t j: " << j - << "\n\t this: " << this - ); - } - } -#endif - - - - - structural_svm_sequence_labeling_problem<feature_extractor> prob(x, y, fe, num_threads); - matrix<double,0,1> weights; - if (verbose) - prob.be_verbose(); - - prob.set_epsilon(eps); - prob.set_max_iterations(max_iterations); - prob.set_c(C); - prob.set_max_cache_size(max_cache_size); - for (unsigned long i = 0; i < loss_values.size(); ++i) - prob.set_loss(i,loss_values[i]); - - solver(prob, weights, num_nonnegative_weights(fe)); - - return sequence_labeler<feature_extractor>(weights,fe); - } - - private: - - double C; - oca solver; - double eps; - unsigned long max_iterations; - bool verbose; - unsigned long num_threads; - unsigned long max_cache_size; - std::vector<double> loss_values; - - void set_defaults () - { - C = 100; - verbose = false; - eps = 0.1; - max_iterations = 10000; - num_threads = 2; - max_cache_size = 5; - loss_values.assign(num_labels(), 1); - } - - feature_extractor fe; - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_STRUCTURAL_SEQUENCE_LABELING_TRAiNER_Hh_ - - - diff --git a/ml/dlib/dlib/svm/structural_sequence_labeling_trainer_abstract.h b/ml/dlib/dlib/svm/structural_sequence_labeling_trainer_abstract.h deleted file mode 100644 index 43e5f5131..000000000 --- a/ml/dlib/dlib/svm/structural_sequence_labeling_trainer_abstract.h +++ /dev/null @@ -1,266 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_STRUCTURAL_SEQUENCE_LABELING_TRAiNER_ABSTRACT_Hh_ -#ifdef DLIB_STRUCTURAL_SEQUENCE_LABELING_TRAiNER_ABSTRACT_Hh_ - -#include "../algs.h" -#include "../optimization.h" -#include "structural_svm_sequence_labeling_problem_abstract.h" -#include "sequence_labeler_abstract.h" - - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - class structural_sequence_labeling_trainer - { - /*! - REQUIREMENTS ON feature_extractor - It must be an object that implements an interface compatible with - the example_feature_extractor defined in dlib/svm/sequence_labeler_abstract.h. - - WHAT THIS OBJECT REPRESENTS - This object is a tool for learning to do sequence labeling based - on a set of training data. The training procedure produces a - sequence_labeler object which can be used to predict the labels of - new data sequences. - - Note that this is just a convenience wrapper around the - structural_svm_sequence_labeling_problem to make it look - similar to all the other trainers in dlib. - !*/ - - public: - typedef typename feature_extractor::sequence_type sample_sequence_type; - typedef std::vector<unsigned long> labeled_sequence_type; - typedef sequence_labeler<feature_extractor> trained_function_type; - - structural_sequence_labeling_trainer ( - ); - /*! - ensures - - #get_c() == 100 - - this object isn't verbose - - #get_epsilon() == 0.1 - - #get_max_iterations() == 10000 - - #get_num_threads() == 2 - - #get_max_cache_size() == 5 - - #get_feature_extractor() == a default initialized feature_extractor - !*/ - - explicit structural_sequence_labeling_trainer ( - const feature_extractor& fe - ); - /*! - ensures - - #get_c() == 100 - - this object isn't verbose - - #get_epsilon() == 0.1 - - #get_max_iterations() == 10000 - - #get_num_threads() == 2 - - #get_max_cache_size() == 5 - - #get_feature_extractor() == fe - !*/ - - const feature_extractor& get_feature_extractor ( - ) const; - /*! - ensures - - returns the feature extractor used by this object - !*/ - - unsigned long num_labels ( - ) const; - /*! - ensures - - returns get_feature_extractor().num_labels() - (i.e. returns the number of possible output labels for each - element of a sequence) - !*/ - - void set_num_threads ( - unsigned long num - ); - /*! - ensures - - #get_num_threads() == num - !*/ - - unsigned long get_num_threads ( - ) const; - /*! - ensures - - returns the number of threads used during training. You should - usually set this equal to the number of processing cores on your - machine. - !*/ - - void set_epsilon ( - double eps - ); - /*! - requires - - eps > 0 - ensures - - #get_epsilon() == eps - !*/ - - const double get_epsilon ( - ) const; - /*! - ensures - - returns the error epsilon that determines when training should stop. - Smaller values may result in a more accurate solution but take longer - to train. You can think of this epsilon value as saying "solve the - optimization problem until the average number of labeling mistakes per - training sample is within epsilon of its optimal value". - !*/ - - void set_max_iterations ( - unsigned long max_iter - ); - /*! - ensures - - #get_max_iterations() == max_iter - !*/ - - unsigned long get_max_iterations ( - ); - /*! - ensures - - returns the maximum number of iterations the SVM optimizer is allowed to - run before it is required to stop and return a result. - !*/ - - void set_max_cache_size ( - unsigned long max_size - ); - /*! - ensures - - #get_max_cache_size() == max_size - !*/ - - unsigned long get_max_cache_size ( - ) const; - /*! - ensures - - During training, this object basically runs the sequence_labeler on - each training sample, over and over. To speed this up, it is possible to - cache the results of these labeler invocations. This function returns the - number of cache elements per training sample kept in the cache. Note - that a value of 0 means caching is not used at all. - !*/ - - void be_verbose ( - ); - /*! - ensures - - This object will print status messages to standard out so that a - user can observe the progress of the algorithm. - !*/ - - void be_quiet ( - ); - /*! - ensures - - this object will not print anything to standard out - !*/ - - void set_oca ( - const oca& item - ); - /*! - ensures - - #get_oca() == item - !*/ - - const oca get_oca ( - ) const; - /*! - ensures - - returns a copy of the optimizer used to solve the structural SVM problem. - !*/ - - void set_c ( - double C - ); - /*! - requires - - C > 0 - ensures - - #get_c() = C - !*/ - - double get_c ( - ) const; - /*! - ensures - - returns the SVM regularization parameter. It is the parameter - that determines the trade-off between trying to fit the training - data (i.e. minimize the loss) or allowing more errors but hopefully - improving the generalization of the resulting sequence labeler. Larger - values encourage exact fitting while smaller values of C may encourage - better generalization. - !*/ - - double get_loss ( - unsigned long label - ) const; - /*! - requires - - label < num_labels() - ensures - - returns the loss incurred when a sequence element with the given - label is misclassified. This value controls how much we care about - correctly classifying this type of label. Larger loss values indicate - that we care more strongly than smaller values. - !*/ - - void set_loss ( - unsigned long label, - double value - ); - /*! - requires - - label < num_labels() - - value >= 0 - ensures - - #get_loss(label) == value - !*/ - - const sequence_labeler<feature_extractor> train( - const std::vector<sample_sequence_type>& x, - const std::vector<labeled_sequence_type>& y - ) const; - /*! - requires - - is_sequence_labeling_problem(x, y) == true - - contains_invalid_labeling(get_feature_extractor(), x, y) == false - - for all valid i and j: y[i][j] < num_labels() - ensures - - Uses the structural_svm_sequence_labeling_problem to train a - sequence_labeler on the given x/y training pairs. The idea is - to learn to predict a y given an input x. - - returns a function F with the following properties: - - F(new_x) == A sequence of predicted labels for the elements of new_x. - - F(new_x).size() == new_x.size() - - for all valid i: - - F(new_x)[i] == the predicted label of new_x[i] - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_STRUCTURAL_SEQUENCE_LABELING_TRAiNER_ABSTRACT_Hh_ - - - - diff --git a/ml/dlib/dlib/svm/structural_sequence_segmentation_trainer.h b/ml/dlib/dlib/svm/structural_sequence_segmentation_trainer.h deleted file mode 100644 index 2e0214008..000000000 --- a/ml/dlib/dlib/svm/structural_sequence_segmentation_trainer.h +++ /dev/null @@ -1,281 +0,0 @@ -// Copyright (C) 2013 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_STRUCTURAL_SEQUENCE_sEGMENTATION_TRAINER_Hh_ -#define DLIB_STRUCTURAL_SEQUENCE_sEGMENTATION_TRAINER_Hh_ - -#include "structural_sequence_segmentation_trainer_abstract.h" -#include "structural_sequence_labeling_trainer.h" -#include "sequence_segmenter.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - class structural_sequence_segmentation_trainer - { - public: - typedef typename feature_extractor::sequence_type sample_sequence_type; - typedef std::vector<std::pair<unsigned long, unsigned long> > segmented_sequence_type; - - typedef sequence_segmenter<feature_extractor> trained_function_type; - - explicit structural_sequence_segmentation_trainer ( - const feature_extractor& fe_ - ) : trainer(impl_ss::feature_extractor<feature_extractor>(fe_)) - { - loss_per_missed_segment = 1; - loss_per_false_alarm = 1; - } - - structural_sequence_segmentation_trainer ( - ) - { - loss_per_missed_segment = 1; - loss_per_false_alarm = 1; - } - - const feature_extractor& get_feature_extractor ( - ) const { return trainer.get_feature_extractor().fe; } - - void set_num_threads ( - unsigned long num - ) - { - trainer.set_num_threads(num); - } - - unsigned long get_num_threads ( - ) const - { - return trainer.get_num_threads(); - } - - void set_epsilon ( - double eps_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(eps_ > 0, - "\t void structural_sequence_segmentation_trainer::set_epsilon()" - << "\n\t eps_ must be greater than 0" - << "\n\t eps_: " << eps_ - << "\n\t this: " << this - ); - - trainer.set_epsilon(eps_); - } - - double get_epsilon ( - ) const { return trainer.get_epsilon(); } - - unsigned long get_max_iterations ( - ) const { return trainer.get_max_iterations(); } - - void set_max_iterations ( - unsigned long max_iter - ) - { - trainer.set_max_iterations(max_iter); - } - - void set_max_cache_size ( - unsigned long max_size - ) - { - trainer.set_max_cache_size(max_size); - } - - unsigned long get_max_cache_size ( - ) const - { - return trainer.get_max_cache_size(); - } - - void be_verbose ( - ) - { - trainer.be_verbose(); - } - - void be_quiet ( - ) - { - trainer.be_quiet(); - } - - void set_oca ( - const oca& item - ) - { - trainer.set_oca(item); - } - - const oca get_oca ( - ) const - { - return trainer.get_oca(); - } - - void set_c ( - double C_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(C_ > 0, - "\t void structural_sequence_segmentation_trainer::set_c()" - << "\n\t C_ must be greater than 0" - << "\n\t C_: " << C_ - << "\n\t this: " << this - ); - - trainer.set_c(C_); - } - - double get_c ( - ) const - { - return trainer.get_c(); - } - - void set_loss_per_missed_segment ( - double loss - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(loss >= 0, - "\t void structural_sequence_segmentation_trainer::set_loss_per_missed_segment(loss)" - << "\n\t invalid inputs were given to this function" - << "\n\t loss: " << loss - << "\n\t this: " << this - ); - - loss_per_missed_segment = loss; - - if (feature_extractor::use_BIO_model) - { - trainer.set_loss(impl_ss::BEGIN, loss_per_missed_segment); - trainer.set_loss(impl_ss::INSIDE, loss_per_missed_segment); - } - else - { - trainer.set_loss(impl_ss::BEGIN, loss_per_missed_segment); - trainer.set_loss(impl_ss::INSIDE, loss_per_missed_segment); - trainer.set_loss(impl_ss::LAST, loss_per_missed_segment); - trainer.set_loss(impl_ss::UNIT, loss_per_missed_segment); - } - } - - double get_loss_per_missed_segment ( - ) const - { - return loss_per_missed_segment; - } - - void set_loss_per_false_alarm ( - double loss - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(loss >= 0, - "\t void structural_sequence_segmentation_trainer::set_loss_per_false_alarm(loss)" - << "\n\t invalid inputs were given to this function" - << "\n\t loss: " << loss - << "\n\t this: " << this - ); - - loss_per_false_alarm = loss; - - trainer.set_loss(impl_ss::OUTSIDE, loss_per_false_alarm); - } - - double get_loss_per_false_alarm ( - ) const - { - return loss_per_false_alarm; - } - - const sequence_segmenter<feature_extractor> train( - const std::vector<sample_sequence_type>& x, - const std::vector<segmented_sequence_type>& y - ) const - { - - // make sure requires clause is not broken - DLIB_ASSERT(is_sequence_segmentation_problem(x,y) == true, - "\t sequence_segmenter structural_sequence_segmentation_trainer::train(x,y)" - << "\n\t invalid inputs were given to this function" - << "\n\t x.size(): " << x.size() - << "\n\t is_sequence_segmentation_problem(x,y): " << is_sequence_segmentation_problem(x,y) - << "\n\t this: " << this - ); - - std::vector<std::vector<unsigned long> > labels(y.size()); - if (feature_extractor::use_BIO_model) - { - // convert y into tagged BIO labels - for (unsigned long i = 0; i < labels.size(); ++i) - { - labels[i].resize(x[i].size(), impl_ss::OUTSIDE); - for (unsigned long j = 0; j < y[i].size(); ++j) - { - const unsigned long begin = y[i][j].first; - const unsigned long end = y[i][j].second; - if (begin != end) - { - labels[i][begin] = impl_ss::BEGIN; - for (unsigned long k = begin+1; k < end; ++k) - labels[i][k] = impl_ss::INSIDE; - } - } - } - } - else - { - // convert y into tagged BILOU labels - for (unsigned long i = 0; i < labels.size(); ++i) - { - labels[i].resize(x[i].size(), impl_ss::OUTSIDE); - for (unsigned long j = 0; j < y[i].size(); ++j) - { - const unsigned long begin = y[i][j].first; - const unsigned long end = y[i][j].second; - if (begin != end) - { - if (begin+1==end) - { - labels[i][begin] = impl_ss::UNIT; - } - else - { - labels[i][begin] = impl_ss::BEGIN; - for (unsigned long k = begin+1; k+1 < end; ++k) - labels[i][k] = impl_ss::INSIDE; - labels[i][end-1] = impl_ss::LAST; - } - } - } - } - } - - sequence_labeler<impl_ss::feature_extractor<feature_extractor> > temp; - temp = trainer.train(x, labels); - return sequence_segmenter<feature_extractor>(temp.get_weights(), trainer.get_feature_extractor().fe); - } - - private: - - structural_sequence_labeling_trainer<impl_ss::feature_extractor<feature_extractor> > trainer; - double loss_per_missed_segment; - double loss_per_false_alarm; - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_STRUCTURAL_SEQUENCE_sEGMENTATION_TRAINER_Hh_ - diff --git a/ml/dlib/dlib/svm/structural_sequence_segmentation_trainer_abstract.h b/ml/dlib/dlib/svm/structural_sequence_segmentation_trainer_abstract.h deleted file mode 100644 index bcd927ca6..000000000 --- a/ml/dlib/dlib/svm/structural_sequence_segmentation_trainer_abstract.h +++ /dev/null @@ -1,264 +0,0 @@ -// Copyright (C) 2013 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_STRUCTURAL_SEQUENCE_sEGMENTATION_TRAINER_ABSTRACT_Hh_ -#ifdef DLIB_STRUCTURAL_SEQUENCE_sEGMENTATION_TRAINER_ABSTRACT_Hh_ - -#include "sequence_segmenter_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - class structural_sequence_segmentation_trainer - { - /*! - REQUIREMENTS ON feature_extractor - It must be an object that implements an interface compatible with - the example_feature_extractor defined in dlib/svm/sequence_segmenter_abstract.h. - - WHAT THIS OBJECT REPRESENTS - This object is a tool for learning to do sequence segmentation based on a - set of training data. The training procedure produces a sequence_segmenter - object which can be used to identify the sub-segments of new data - sequences. - - This object internally uses the structural_sequence_labeling_trainer to - solve the learning problem. - !*/ - - public: - - typedef typename feature_extractor::sequence_type sample_sequence_type; - typedef std::vector<std::pair<unsigned long, unsigned long> > segmented_sequence_type; - - typedef sequence_segmenter<feature_extractor> trained_function_type; - - structural_sequence_segmentation_trainer ( - ); - /*! - ensures - - #get_c() == 100 - - this object isn't verbose - - #get_epsilon() == 0.1 - - #get_max_iterations() == 10000 - - #get_num_threads() == 2 - - #get_max_cache_size() == 40 - - #get_feature_extractor() == a default initialized feature_extractor - - #get_loss_per_missed_segment() == 1 - - #get_loss_per_false_alarm() == 1 - !*/ - - explicit structural_sequence_segmentation_trainer ( - const feature_extractor& fe - ); - /*! - ensures - - #get_c() == 100 - - this object isn't verbose - - #get_epsilon() == 0.1 - - #get_max_iterations() == 10000 - - #get_num_threads() == 2 - - #get_max_cache_size() == 40 - - #get_feature_extractor() == fe - - #get_loss_per_missed_segment() == 1 - - #get_loss_per_false_alarm() == 1 - !*/ - - const feature_extractor& get_feature_extractor ( - ) const; - /*! - ensures - - returns the feature extractor used by this object - !*/ - - void set_num_threads ( - unsigned long num - ); - /*! - ensures - - #get_num_threads() == num - !*/ - - unsigned long get_num_threads ( - ) const; - /*! - ensures - - returns the number of threads used during training. You should - usually set this equal to the number of processing cores on your - machine. - !*/ - - void set_epsilon ( - double eps_ - ); - /*! - requires - - eps > 0 - ensures - - #get_epsilon() == eps - !*/ - - double get_epsilon ( - ) const; - /*! - ensures - - returns the error epsilon that determines when training should stop. - Smaller values may result in a more accurate solution but take longer - to train. You can think of this epsilon value as saying "solve the - optimization problem until the average number of segmentation mistakes - per training sample is within epsilon of its optimal value". - !*/ - - void set_max_iterations ( - unsigned long max_iter - ); - /*! - ensures - - #get_max_iterations() == max_iter - !*/ - - unsigned long get_max_iterations ( - ); - /*! - ensures - - returns the maximum number of iterations the SVM optimizer is allowed to - run before it is required to stop and return a result. - !*/ - - void set_max_cache_size ( - unsigned long max_size - ); - /*! - ensures - - #get_max_cache_size() == max_size - !*/ - - unsigned long get_max_cache_size ( - ) const; - /*! - ensures - - During training, this object basically runs the sequence_segmenter on - each training sample, over and over. To speed this up, it is possible to - cache the results of these segmenter invocations. This function returns - the number of cache elements per training sample kept in the cache. Note - that a value of 0 means caching is not used at all. - !*/ - - void be_verbose ( - ); - /*! - ensures - - This object will print status messages to standard out so that a user can - observe the progress of the algorithm. - !*/ - - void be_quiet ( - ); - /*! - ensures - - this object will not print anything to standard out - !*/ - - void set_oca ( - const oca& item - ); - /*! - ensures - - #get_oca() == item - !*/ - - const oca get_oca ( - ) const; - /*! - ensures - - returns a copy of the optimizer used to solve the structural SVM problem. - !*/ - - void set_c ( - double C - ); - /*! - requires - - C > 0 - ensures - - #get_c() = C - !*/ - - double get_c ( - ) const; - /*! - ensures - - returns the SVM regularization parameter. It is the parameter that - determines the trade-off between trying to fit the training data (i.e. - minimize the loss) or allowing more errors but hopefully improving the - generalization of the resulting sequence labeler. Larger values - encourage exact fitting while smaller values of C may encourage better - generalization. - !*/ - - void set_loss_per_missed_segment ( - double loss - ); - /*! - requires - - loss >= 0 - ensures - - #get_loss_per_missed_segment() == loss - !*/ - - double get_loss_per_missed_segment ( - ) const; - /*! - ensures - - returns the amount of loss incurred for failing to detect a segment. The - larger the loss the more important it is to detect all the segments. - !*/ - - - void set_loss_per_false_alarm ( - double loss - ); - /*! - requires - - loss >= 0 - ensures - - #get_loss_per_false_alarm() == loss - !*/ - - double get_loss_per_false_alarm ( - ) const; - /*! - ensures - - returns the amount of loss incurred for outputting a false detection. The - larger the loss the more important it is to avoid outputting false - detections. - !*/ - - const sequence_segmenter<feature_extractor> train( - const std::vector<sample_sequence_type>& x, - const std::vector<segmented_sequence_type>& y - ) const; - /*! - requires - - is_sequence_segmentation_problem(x, y) == true - ensures - - Uses the given training data to learn to do sequence segmentation. That - is, this function will try to find a sequence_segmenter capable of - predicting y[i] when given x[i] as input. Moreover, it should also be - capable of predicting the segmentation of new input sequences. Or in - other words, the learned sequence_segmenter should also generalize to new - data outside the training dataset. - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_STRUCTURAL_SEQUENCE_sEGMENTATION_TRAINER_ABSTRACT_Hh_ - diff --git a/ml/dlib/dlib/svm/structural_svm_assignment_problem.h b/ml/dlib/dlib/svm/structural_svm_assignment_problem.h deleted file mode 100644 index 963af1631..000000000 --- a/ml/dlib/dlib/svm/structural_svm_assignment_problem.h +++ /dev/null @@ -1,288 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_STRUCTURAL_SVM_ASSiGNMENT_PROBLEM_Hh_ -#define DLIB_STRUCTURAL_SVM_ASSiGNMENT_PROBLEM_Hh_ - - -#include "structural_svm_assignment_problem_abstract.h" -#include "../matrix.h" -#include <vector> -#include <iterator> -#include "structural_svm_problem_threaded.h" - -// ---------------------------------------------------------------------------------------- - -namespace dlib -{ - template <long n, typename T> - struct column_matrix_static_resize - { - typedef T type; - }; - - template <long n, typename T, long NR, long NC, typename MM, typename L> - struct column_matrix_static_resize<n, matrix<T,NR,NC,MM,L> > - { - typedef matrix<T,NR+n,NC,MM,L> type; - }; - - template <long n, typename T, long NC, typename MM, typename L> - struct column_matrix_static_resize<n, matrix<T,0,NC,MM,L> > - { - typedef matrix<T,0,NC,MM,L> type; - }; - - template <typename T> - struct add_one_to_static_feat_size - { - typedef typename column_matrix_static_resize<1,typename T::feature_vector_type>::type type; - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - class structural_svm_assignment_problem : noncopyable, - public structural_svm_problem_threaded<matrix<double,0,1>, typename add_one_to_static_feat_size<feature_extractor>::type > - { - public: - typedef matrix<double,0,1> matrix_type; - typedef typename add_one_to_static_feat_size<feature_extractor>::type feature_vector_type; - - typedef typename feature_extractor::lhs_element lhs_element; - typedef typename feature_extractor::rhs_element rhs_element; - - - typedef std::pair<std::vector<lhs_element>, std::vector<rhs_element> > sample_type; - - typedef std::vector<long> label_type; - - structural_svm_assignment_problem( - const std::vector<sample_type>& samples_, - const std::vector<label_type>& labels_, - const feature_extractor& fe_, - bool force_assignment_, - unsigned long num_threads, - const double loss_per_false_association_, - const double loss_per_missed_association_ - ) : - structural_svm_problem_threaded<matrix_type,feature_vector_type>(num_threads), - samples(samples_), - labels(labels_), - fe(fe_), - force_assignment(force_assignment_), - loss_per_false_association(loss_per_false_association_), - loss_per_missed_association(loss_per_missed_association_) - { - // make sure requires clause is not broken -#ifdef ENABLE_ASSERTS - DLIB_ASSERT(loss_per_false_association > 0 && loss_per_missed_association > 0, - "\t structural_svm_assignment_problem::structural_svm_assignment_problem()" - << "\n\t invalid inputs were given to this function" - << "\n\t loss_per_false_association: " << loss_per_false_association - << "\n\t loss_per_missed_association: " << loss_per_missed_association - << "\n\t this: " << this - ); - if (force_assignment) - { - DLIB_ASSERT(is_forced_assignment_problem(samples, labels), - "\t structural_svm_assignment_problem::structural_svm_assignment_problem()" - << "\n\t invalid inputs were given to this function" - << "\n\t is_forced_assignment_problem(samples,labels): " << is_forced_assignment_problem(samples,labels) - << "\n\t is_assignment_problem(samples,labels): " << is_assignment_problem(samples,labels) - << "\n\t is_learning_problem(samples,labels): " << is_learning_problem(samples,labels) - << "\n\t this: " << this - ); - } - else - { - DLIB_ASSERT(is_assignment_problem(samples, labels), - "\t structural_svm_assignment_problem::structural_svm_assignment_problem()" - << "\n\t invalid inputs were given to this function" - << "\n\t is_assignment_problem(samples,labels): " << is_assignment_problem(samples,labels) - << "\n\t is_learning_problem(samples,labels): " << is_learning_problem(samples,labels) - << "\n\t this: " << this - ); - } -#endif - - } - - private: - virtual long get_num_dimensions ( - ) const - { - return fe.num_features()+1; // +1 for the bias term - } - - virtual long get_num_samples ( - ) const - { - return samples.size(); - } - - template <typename psi_type> - typename enable_if<is_matrix<psi_type> >::type get_joint_feature_vector ( - const sample_type& sample, - const label_type& label, - psi_type& psi - ) const - { - typename feature_extractor::feature_vector_type feats; - psi.set_size(get_num_dimensions()); - psi = 0; - for (unsigned long i = 0; i < sample.first.size(); ++i) - { - if (label[i] != -1) - { - fe.get_features(sample.first[i], sample.second[label[i]], feats); - set_rowm(psi,range(0,feats.size()-1)) += feats; - psi(get_num_dimensions()-1) += 1; - } - } - } - - template <typename T> - void append_to_sparse_vect ( - T& psi, - const T& vect - ) const - { - std::copy(vect.begin(), vect.end(), std::back_inserter(psi)); - } - - template <typename psi_type> - typename disable_if<is_matrix<psi_type> >::type get_joint_feature_vector ( - const sample_type& sample, - const label_type& label, - psi_type& psi - ) const - { - psi.clear(); - feature_vector_type feats; - int num_assignments = 0; - for (unsigned long i = 0; i < sample.first.size(); ++i) - { - if (label[i] != -1) - { - fe.get_features(sample.first[i], sample.second[label[i]], feats); - append_to_sparse_vect(psi, feats); - ++num_assignments; - } - } - psi.push_back(std::make_pair(get_num_dimensions()-1,num_assignments)); - } - - virtual void get_truth_joint_feature_vector ( - long idx, - feature_vector_type& psi - ) const - { - get_joint_feature_vector(samples[idx], labels[idx], psi); - } - - virtual void separation_oracle ( - const long idx, - const matrix_type& current_solution, - double& loss, - feature_vector_type& psi - ) const - { - matrix<double> cost; - unsigned long size; - if (force_assignment) - { - unsigned long lhs_size = samples[idx].first.size(); - unsigned long rhs_size = samples[idx].second.size(); - size = std::max(lhs_size, rhs_size); - } - else - { - unsigned long rhs_size = samples[idx].second.size() + samples[idx].first.size(); - size = rhs_size; - } - cost.set_size(size, size); - - typename feature_extractor::feature_vector_type feats; - - // now fill out the cost assignment matrix - for (long r = 0; r < cost.nr(); ++r) - { - for (long c = 0; c < cost.nc(); ++c) - { - if (r < (long)samples[idx].first.size()) - { - if (c < (long)samples[idx].second.size()) - { - fe.get_features(samples[idx].first[r], samples[idx].second[c], feats); - const double bias = current_solution(current_solution.size()-1); - cost(r,c) = dot(colm(current_solution,0,current_solution.size()-1), feats) + bias; - - // add in the loss since this corresponds to an incorrect prediction. - if (c != labels[idx][r]) - { - cost(r,c) += loss_per_false_association; - } - } - else - { - if (labels[idx][r] == -1) - cost(r,c) = 0; - else - cost(r,c) = loss_per_missed_association; - } - - } - else - { - cost(r,c) = 0; - } - } - } - - std::vector<long> assignment; - - if (cost.size() != 0) - { - // max_cost_assignment() only works with integer matrices, so convert from - // double to integer. - const double scale = (std::numeric_limits<dlib::int64>::max()/1000)/max(abs(cost)); - matrix<dlib::int64> int_cost = matrix_cast<dlib::int64>(round(cost*scale)); - assignment = max_cost_assignment(int_cost); - assignment.resize(samples[idx].first.size()); - } - - loss = 0; - // adjust assignment so that non-assignments have a value of -1. Also compute loss. - for (unsigned long i = 0; i < assignment.size(); ++i) - { - if (assignment[i] >= (long)samples[idx].second.size()) - assignment[i] = -1; - - if (assignment[i] != labels[idx][i]) - { - if (assignment[i] == -1) - loss += loss_per_missed_association; - else - loss += loss_per_false_association; - } - } - - get_joint_feature_vector(samples[idx], assignment, psi); - } - - const std::vector<sample_type>& samples; - const std::vector<label_type>& labels; - const feature_extractor& fe; - bool force_assignment; - const double loss_per_false_association; - const double loss_per_missed_association; - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_STRUCTURAL_SVM_ASSiGNMENT_PROBLEM_Hh_ - diff --git a/ml/dlib/dlib/svm/structural_svm_assignment_problem_abstract.h b/ml/dlib/dlib/svm/structural_svm_assignment_problem_abstract.h deleted file mode 100644 index c06190726..000000000 --- a/ml/dlib/dlib/svm/structural_svm_assignment_problem_abstract.h +++ /dev/null @@ -1,87 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_STRUCTURAL_SVM_ASSiGNMENT_PROBLEM_ABSTRACT_Hh_ -#ifdef DLIB_STRUCTURAL_SVM_ASSiGNMENT_PROBLEM_ABSTRACT_Hh_ - - -#include "../matrix.h" -#include <vector> -#include "structural_svm_problem_threaded_abstract.h" -#include "assignment_function_abstract.h" - -// ---------------------------------------------------------------------------------------- - -namespace dlib -{ - - template < - typename feature_extractor - > - class structural_svm_assignment_problem : noncopyable, - public structural_svm_problem_threaded<matrix<double,0,1>, - typename feature_extractor::feature_vector_type > - { - /*! - REQUIREMENTS ON feature_extractor - It must be an object that implements an interface compatible with - the example_feature_extractor defined in dlib/svm/assignment_function_abstract.h. - - WHAT THIS OBJECT REPRESENTS - This object is a tool for learning the parameters needed to use an - assignment_function object. It learns the parameters by formulating the - problem as a structural SVM problem. - !*/ - - public: - typedef matrix<double,0,1> matrix_type; - typedef typename feature_extractor::feature_vector_type feature_vector_type; - typedef typename feature_extractor::lhs_element lhs_element; - typedef typename feature_extractor::rhs_element rhs_element; - typedef std::pair<std::vector<lhs_element>, std::vector<rhs_element> > sample_type; - typedef std::vector<long> label_type; - - structural_svm_assignment_problem( - const std::vector<sample_type>& samples, - const std::vector<label_type>& labels, - const feature_extractor& fe, - bool force_assignment, - unsigned long num_threads, - const double loss_per_false_association, - const double loss_per_missed_association - ); - /*! - requires - - loss_per_false_association > 0 - - loss_per_missed_association > 0 - - is_assignment_problem(samples,labels) == true - - if (force_assignment) then - - is_forced_assignment_problem(samples,labels) == true - ensures - - This object attempts to learn a mapping from the given samples to the - given labels. In particular, it attempts to learn to predict labels[i] - based on samples[i]. Or in other words, this object can be used to learn - a parameter vector and bias, w and b, such that an assignment_function declared as: - assignment_function<feature_extractor> assigner(w,b,fe,force_assignment) - results in an assigner object which attempts to compute the following mapping: - labels[i] == labeler(samples[i]) - - This object will use num_threads threads during the optimization - procedure. You should set this parameter equal to the number of - available processing cores on your machine. - - When solving the structural SVM problem, we will use - loss_per_false_association as the loss for incorrectly associating - objects that shouldn't be associated. - - When solving the structural SVM problem, we will use - loss_per_missed_association as the loss for failing to associate to - objects that are supposed to be associated with each other. - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_STRUCTURAL_SVM_ASSiGNMENT_PROBLEM_ABSTRACT_Hh_ - - - diff --git a/ml/dlib/dlib/svm/structural_svm_distributed.h b/ml/dlib/dlib/svm/structural_svm_distributed.h deleted file mode 100644 index a9542c70f..000000000 --- a/ml/dlib/dlib/svm/structural_svm_distributed.h +++ /dev/null @@ -1,700 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_STRUCTURAL_SVM_DISTRIBUTeD_Hh_ -#define DLIB_STRUCTURAL_SVM_DISTRIBUTeD_Hh_ - -#include <memory> -#include <iostream> -#include <vector> - -#include "structural_svm_distributed_abstract.h" -#include "structural_svm_problem.h" -#include "../bridge.h" -#include "../misc_api.h" -#include "../statistics.h" -#include "../threads.h" -#include "../pipe.h" -#include "../type_safe_union.h" - - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - namespace impl - { - - template <typename matrix_type> - struct oracle_response - { - typedef typename matrix_type::type scalar_type; - - matrix_type subgradient; - scalar_type loss; - long num; - - friend void swap (oracle_response& a, oracle_response& b) - { - a.subgradient.swap(b.subgradient); - std::swap(a.loss, b.loss); - std::swap(a.num, b.num); - } - - friend void serialize (const oracle_response& item, std::ostream& out) - { - serialize(item.subgradient, out); - dlib::serialize(item.loss, out); - dlib::serialize(item.num, out); - } - - friend void deserialize (oracle_response& item, std::istream& in) - { - deserialize(item.subgradient, in); - dlib::deserialize(item.loss, in); - dlib::deserialize(item.num, in); - } - }; - - // ---------------------------------------------------------------------------------------- - - template <typename matrix_type> - struct oracle_request - { - typedef typename matrix_type::type scalar_type; - - matrix_type current_solution; - scalar_type saved_current_risk_gap; - bool skip_cache; - bool converged; - - friend void swap (oracle_request& a, oracle_request& b) - { - a.current_solution.swap(b.current_solution); - std::swap(a.saved_current_risk_gap, b.saved_current_risk_gap); - std::swap(a.skip_cache, b.skip_cache); - std::swap(a.converged, b.converged); - } - - friend void serialize (const oracle_request& item, std::ostream& out) - { - serialize(item.current_solution, out); - dlib::serialize(item.saved_current_risk_gap, out); - dlib::serialize(item.skip_cache, out); - dlib::serialize(item.converged, out); - } - - friend void deserialize (oracle_request& item, std::istream& in) - { - deserialize(item.current_solution, in); - dlib::deserialize(item.saved_current_risk_gap, in); - dlib::deserialize(item.skip_cache, in); - dlib::deserialize(item.converged, in); - } - }; - - } - -// ---------------------------------------------------------------------------------------- - - class svm_struct_processing_node : noncopyable - { - public: - - template < - typename T, - typename U - > - svm_struct_processing_node ( - const structural_svm_problem<T,U>& problem, - unsigned short port, - unsigned short num_threads - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(port != 0 && problem.get_num_samples() != 0 && - problem.get_num_dimensions() != 0, - "\t svm_struct_processing_node()" - << "\n\t Invalid arguments were given to this function" - << "\n\t port: " << port - << "\n\t problem.get_num_samples(): " << problem.get_num_samples() - << "\n\t problem.get_num_dimensions(): " << problem.get_num_dimensions() - << "\n\t this: " << this - ); - - the_problem.reset(new node_type<T,U>(problem, port, num_threads)); - } - - private: - - struct base - { - virtual ~base(){} - }; - - template < - typename matrix_type, - typename feature_vector_type - > - class node_type : public base, threaded_object - { - public: - typedef typename matrix_type::type scalar_type; - - node_type( - const structural_svm_problem<matrix_type,feature_vector_type>& prob, - unsigned short port, - unsigned long num_threads - ) : in(3),out(3), problem(prob), tp(num_threads) - { - b.reconfigure(listen_on_port(port), receive(in), transmit(out)); - - start(); - } - - ~node_type() - { - in.disable(); - out.disable(); - wait(); - } - - private: - - void thread() - { - using namespace impl; - tsu_in msg; - tsu_out temp; - - timestamper ts; - running_stats<double> with_buffer_time; - running_stats<double> without_buffer_time; - unsigned long num_iterations_executed = 0; - - while (in.dequeue(msg)) - { - // initialize the cache and compute psi_true. - if (cache.size() == 0) - { - cache.resize(problem.get_num_samples()); - for (unsigned long i = 0; i < cache.size(); ++i) - cache[i].init(&problem,i); - - psi_true.set_size(problem.get_num_dimensions(),1); - psi_true = 0; - - const unsigned long num = problem.get_num_samples(); - feature_vector_type ftemp; - for (unsigned long i = 0; i < num; ++i) - { - cache[i].get_truth_joint_feature_vector_cached(ftemp); - - subtract_from(psi_true, ftemp); - } - } - - - if (msg.template contains<bridge_status>() && - msg.template get<bridge_status>().is_connected) - { - temp = problem.get_num_dimensions(); - out.enqueue(temp); - - } - else if (msg.template contains<oracle_request<matrix_type> >()) - { - ++num_iterations_executed; - - const oracle_request<matrix_type>& req = msg.template get<oracle_request<matrix_type> >(); - - oracle_response<matrix_type>& data = temp.template get<oracle_response<matrix_type> >(); - - data.subgradient = psi_true; - data.loss = 0; - - data.num = problem.get_num_samples(); - - const uint64 start_time = ts.get_timestamp(); - - // pick fastest buffering strategy - bool buffer_subgradients_locally = with_buffer_time.mean() < without_buffer_time.mean(); - - // every 50 iterations we should try to flip the buffering scheme to see if - // doing it the other way might be better. - if ((num_iterations_executed%50) == 0) - { - buffer_subgradients_locally = !buffer_subgradients_locally; - } - - binder b(*this, req, data, buffer_subgradients_locally); - parallel_for_blocked(tp, 0, data.num, b, &binder::call_oracle); - - const uint64 stop_time = ts.get_timestamp(); - if (buffer_subgradients_locally) - with_buffer_time.add(stop_time-start_time); - else - without_buffer_time.add(stop_time-start_time); - - out.enqueue(temp); - } - } - } - - struct binder - { - binder ( - const node_type& self_, - const impl::oracle_request<matrix_type>& req_, - impl::oracle_response<matrix_type>& data_, - bool buffer_subgradients_locally_ - ) : self(self_), req(req_), data(data_), - buffer_subgradients_locally(buffer_subgradients_locally_) {} - - void call_oracle ( - long begin, - long end - ) - { - // If we are only going to call the separation oracle once then don't - // run the slightly more complex for loop version of this code. Or if - // we just don't want to run the complex buffering one. The code later - // on decides if we should do the buffering based on how long it takes - // to execute. We do this because, when the subgradient is really high - // dimensional it can take a lot of time to add them together. So we - // might want to avoid doing that. - if (end-begin <= 1 || !buffer_subgradients_locally) - { - scalar_type loss; - feature_vector_type ftemp; - for (long i = begin; i < end; ++i) - { - self.cache[i].separation_oracle_cached(req.converged, - req.skip_cache, - req.saved_current_risk_gap, - req.current_solution, - loss, - ftemp); - - auto_mutex lock(self.accum_mutex); - data.loss += loss; - add_to(data.subgradient, ftemp); - } - } - else - { - scalar_type loss = 0; - matrix_type faccum(data.subgradient.size(),1); - faccum = 0; - - feature_vector_type ftemp; - - for (long i = begin; i < end; ++i) - { - scalar_type loss_temp; - self.cache[i].separation_oracle_cached(req.converged, - req.skip_cache, - req.saved_current_risk_gap, - req.current_solution, - loss_temp, - ftemp); - loss += loss_temp; - add_to(faccum, ftemp); - } - - auto_mutex lock(self.accum_mutex); - data.loss += loss; - add_to(data.subgradient, faccum); - } - } - - const node_type& self; - const impl::oracle_request<matrix_type>& req; - impl::oracle_response<matrix_type>& data; - bool buffer_subgradients_locally; - }; - - - - typedef type_safe_union<impl::oracle_request<matrix_type>, bridge_status> tsu_in; - typedef type_safe_union<impl::oracle_response<matrix_type> , long> tsu_out; - - pipe<tsu_in> in; - pipe<tsu_out> out; - bridge b; - - mutable matrix_type psi_true; - const structural_svm_problem<matrix_type,feature_vector_type>& problem; - mutable std::vector<cache_element_structural_svm<structural_svm_problem<matrix_type,feature_vector_type> > > cache; - - mutable thread_pool tp; - mutex accum_mutex; - }; - - - std::unique_ptr<base> the_problem; - }; - -// ---------------------------------------------------------------------------------------- - - class svm_struct_controller_node : noncopyable - { - public: - - svm_struct_controller_node ( - ) : - eps(0.001), - max_iterations(10000), - cache_based_eps(std::numeric_limits<double>::infinity()), - verbose(false), - C(1) - {} - - double get_cache_based_epsilon ( - ) const - { - return cache_based_eps; - } - - void set_cache_based_epsilon ( - double eps_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(eps_ > 0, - "\t void svm_struct_controller_node::set_cache_based_epsilon()" - << "\n\t eps_ must be greater than 0" - << "\n\t eps_: " << eps_ - << "\n\t this: " << this - ); - - cache_based_eps = eps_; - } - - void set_epsilon ( - double eps_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(eps_ > 0, - "\t void svm_struct_controller_node::set_epsilon()" - << "\n\t eps_ must be greater than 0" - << "\n\t eps_: " << eps_ - << "\n\t this: " << this - ); - - eps = eps_; - } - - double get_epsilon ( - ) const { return eps; } - - unsigned long get_max_iterations ( - ) const { return max_iterations; } - - void set_max_iterations ( - unsigned long max_iter - ) - { - max_iterations = max_iter; - } - - void be_verbose ( - ) - { - verbose = true; - } - - void be_quiet( - ) - { - verbose = false; - } - - void add_nuclear_norm_regularizer ( - long first_dimension, - long rows, - long cols, - double regularization_strength - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(0 <= first_dimension && - 0 <= rows && 0 <= cols && - 0 < regularization_strength, - "\t void svm_struct_controller_node::add_nuclear_norm_regularizer()" - << "\n\t Invalid arguments were given to this function." - << "\n\t first_dimension: " << first_dimension - << "\n\t rows: " << rows - << "\n\t cols: " << cols - << "\n\t regularization_strength: " << regularization_strength - << "\n\t this: " << this - ); - - impl::nuclear_norm_regularizer temp; - temp.first_dimension = first_dimension; - temp.nr = rows; - temp.nc = cols; - temp.regularization_strength = regularization_strength; - nuclear_norm_regularizers.push_back(temp); - } - - unsigned long num_nuclear_norm_regularizers ( - ) const { return nuclear_norm_regularizers.size(); } - - void clear_nuclear_norm_regularizers ( - ) { nuclear_norm_regularizers.clear(); } - - - double get_c ( - ) const { return C; } - - void set_c ( - double C_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(C_ > 0, - "\t void svm_struct_controller_node::set_c()" - << "\n\t C_ must be greater than 0" - << "\n\t C_: " << C_ - << "\n\t this: " << this - ); - - C = C_; - } - - void add_processing_node ( - const network_address& addr - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(addr.port != 0, - "\t void svm_struct_controller_node::add_processing_node()" - << "\n\t Invalid inputs were given to this function" - << "\n\t addr.host_address: " << addr.host_address - << "\n\t addr.port: " << addr.port - << "\n\t this: " << this - ); - - // check if this address is already registered - for (unsigned long i = 0; i < nodes.size(); ++i) - { - if (nodes[i] == addr) - { - return; - } - } - - nodes.push_back(addr); - } - - void add_processing_node ( - const std::string& ip_or_hostname, - unsigned short port - ) - { - add_processing_node(network_address(ip_or_hostname,port)); - } - - unsigned long get_num_processing_nodes ( - ) const - { - return nodes.size(); - } - - void remove_processing_nodes ( - ) - { - nodes.clear(); - } - - template <typename matrix_type> - double operator() ( - const oca& solver, - matrix_type& w - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(get_num_processing_nodes() != 0, - "\t double svm_struct_controller_node::operator()" - << "\n\t You must add some processing nodes before calling this function." - << "\n\t this: " << this - ); - - problem_type<matrix_type> problem(nodes); - problem.set_cache_based_epsilon(cache_based_eps); - problem.set_epsilon(eps); - problem.set_max_iterations(max_iterations); - if (verbose) - problem.be_verbose(); - problem.set_c(C); - for (unsigned long i = 0; i < nuclear_norm_regularizers.size(); ++i) - { - problem.add_nuclear_norm_regularizer( - nuclear_norm_regularizers[i].first_dimension, - nuclear_norm_regularizers[i].nr, - nuclear_norm_regularizers[i].nc, - nuclear_norm_regularizers[i].regularization_strength); - } - - return solver(problem, w); - } - - class invalid_problem : public error - { - public: - invalid_problem( - const std::string& a - ): error(a) {} - }; - - - private: - - template <typename matrix_type_> - class problem_type : public structural_svm_problem<matrix_type_> - { - public: - typedef typename matrix_type_::type scalar_type; - typedef matrix_type_ matrix_type; - - problem_type ( - const std::vector<network_address>& nodes_ - ) : - nodes(nodes_), - in(3), - num_dims(0) - { - - // initialize all the transmit pipes - out_pipes.resize(nodes.size()); - for (unsigned long i = 0; i < out_pipes.size(); ++i) - { - out_pipes[i].reset(new pipe<tsu_out>(3)); - } - - // make bridges that connect to all our remote processing nodes - bridges.resize(nodes.size()); - for (unsigned long i = 0; i< bridges.size(); ++i) - { - bridges[i].reset(new bridge(connect_to(nodes[i]), - receive(in), transmit(*out_pipes[i]))); - } - - - - // The remote processing nodes are supposed to all send the problem dimensionality - // upon connection. So get that and make sure everyone agrees on what it's supposed to be. - tsu_in temp; - unsigned long responses = 0; - bool seen_dim = false; - while (responses < nodes.size()) - { - in.dequeue(temp); - if (temp.template contains<long>()) - { - ++responses; - // if this new dimension doesn't match what we have seen previously - if (seen_dim && num_dims != temp.template get<long>()) - { - throw invalid_problem("remote hosts disagree on the number of dimensions!"); - } - seen_dim = true; - num_dims = temp.template get<long>(); - } - } - } - - // These functions are just here because the structural_svm_problem requires - // them, but since we are overloading get_risk() they are never called so they - // don't matter. - virtual long get_num_samples () const {return 0;} - virtual void get_truth_joint_feature_vector ( long , matrix_type& ) const {} - virtual void separation_oracle ( const long , const matrix_type& , scalar_type& , matrix_type& ) const {} - - virtual long get_num_dimensions ( - ) const - { - return num_dims; - } - - virtual void get_risk ( - matrix_type& w, - scalar_type& risk, - matrix_type& subgradient - ) const - { - using namespace impl; - subgradient.set_size(w.size(),1); - subgradient = 0; - - // send out all the oracle requests - tsu_out temp_out; - for (unsigned long i = 0; i < out_pipes.size(); ++i) - { - temp_out.template get<oracle_request<matrix_type> >().current_solution = w; - temp_out.template get<oracle_request<matrix_type> >().saved_current_risk_gap = this->saved_current_risk_gap; - temp_out.template get<oracle_request<matrix_type> >().skip_cache = this->skip_cache; - temp_out.template get<oracle_request<matrix_type> >().converged = this->converged; - out_pipes[i]->enqueue(temp_out); - } - - // collect all the oracle responses - long num = 0; - scalar_type total_loss = 0; - tsu_in temp_in; - unsigned long responses = 0; - while (responses < out_pipes.size()) - { - in.dequeue(temp_in); - if (temp_in.template contains<oracle_response<matrix_type> >()) - { - ++responses; - const oracle_response<matrix_type>& data = temp_in.template get<oracle_response<matrix_type> >(); - subgradient += data.subgradient; - total_loss += data.loss; - num += data.num; - } - } - - subgradient /= num; - total_loss /= num; - risk = total_loss + dot(subgradient,w); - - if (this->nuclear_norm_regularizers.size() != 0) - { - matrix_type grad; - double obj; - this->compute_nuclear_norm_parts(w, grad, obj); - risk += obj; - subgradient += grad; - } - } - - std::vector<network_address> nodes; - - typedef type_safe_union<impl::oracle_request<matrix_type> > tsu_out; - typedef type_safe_union<impl::oracle_response<matrix_type>, long> tsu_in; - - std::vector<std::shared_ptr<pipe<tsu_out> > > out_pipes; - mutable pipe<tsu_in> in; - std::vector<std::shared_ptr<bridge> > bridges; - long num_dims; - }; - - std::vector<network_address> nodes; - double eps; - unsigned long max_iterations; - double cache_based_eps; - bool verbose; - double C; - std::vector<impl::nuclear_norm_regularizer> nuclear_norm_regularizers; - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_STRUCTURAL_SVM_DISTRIBUTeD_Hh_ - diff --git a/ml/dlib/dlib/svm/structural_svm_distributed_abstract.h b/ml/dlib/dlib/svm/structural_svm_distributed_abstract.h deleted file mode 100644 index 175a643c8..000000000 --- a/ml/dlib/dlib/svm/structural_svm_distributed_abstract.h +++ /dev/null @@ -1,357 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_STRUCTURAL_SVM_DISTRIBUTeD_ABSTRACT_Hh_ -#ifdef DLIB_STRUCTURAL_SVM_DISTRIBUTeD_ABSTRACT_Hh_ - - -#include "structural_svm_problem_abstract.h" -#include "../optimization/optimization_oca_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - class svm_struct_processing_node : noncopyable - { - /*! - WHAT THIS OBJECT REPRESENTS - This object is a tool for distributing the work involved in solving - a dlib::structural_svm_problem across many computers. It is used in - conjunction with the svm_struct_controller_node defined below. - !*/ - - public: - - template < - typename T, - typename U - > - svm_struct_processing_node ( - const structural_svm_problem<T,U>& problem, - unsigned short port, - unsigned short num_threads - ); - /*! - requires - - port != 0 - - problem.get_num_samples() != 0 - - problem.get_num_dimensions() != 0 - ensures - - This object will listen on the given port for a TCP connection from a - svm_struct_controller_node. Once connected, the controller node will - be able to access the given problem. - - Will use num_threads threads at a time to make concurrent calls to the - problem.separation_oracle() routine. You should set this parameter equal - to the number of available processing cores. - - Note that the following parameters within the given problem are ignored: - - problem.get_c() - - problem.get_epsilon() - - problem.get_cache_based_epsilon() - - problem.num_nuclear_norm_regularizers() - - weather the problem is verbose or not - Instead, they are defined by the svm_struct_controller_node. Note, however, - that the problem.get_max_cache_size() parameter is meaningful and controls - the size of the separation oracle cache within a svm_struct_processing_node. - !*/ - }; - -// ---------------------------------------------------------------------------------------- - - class svm_struct_controller_node : noncopyable - { - /*! - INITIAL VALUE - - get_num_processing_nodes() == 0 - - get_epsilon() == 0.001 - - get_max_iterations() == 10000 - - get_c() == 1 - - This object will not be verbose - - WHAT THIS OBJECT REPRESENTS - This object is a tool for distributing the work involved in solving a - dlib::structural_svm_problem across many computers. The best way to understand - its use is via example: - - First, suppose you have defined a structural_svm_problem object by inheriting from - it and defining the appropriate virtual functions. You could solve it by passing - an instance to the oca optimizer. However, if your separation oracle takes a long - time to evaluate then the optimization will take a long time to solve. To speed - this up we can distribute the calls to the separation oracle across many computers. - - To make this concrete, lets imagine you want to distribute the work across three - computers. You can accomplish this by creating four programs. One containing a - svm_struct_controller_node and three containing svm_struct_processing_nodes. - - The programs might look like this: - - Controller program: - int main() - { - svm_struct_controller_node cont; - cont.set_c(100); - // Tell cont where the processing nodes are on your network. - cont.add_processing_node("192.168.1.10:12345"); - cont.add_processing_node("192.168.1.11:12345"); - cont.add_processing_node("192.168.1.12:12345"); - matrix<double> w; - oca solver; - cont(solver, w); // Run the optimization. - // After this finishes w will contain the solution vector. - } - - Processing programs (they are all the same, except that each loads a different subset - of the training data): - int main() - { - // Put one third of your data into this problem object. How you do this depends on your problem. - your_structural_svm_problem problem; - svm_struct_processing_node node(problem, 12345, number_of_cores_on_this_computer); - cout << "hit enter to terminate this program" << endl; - cin.get(); - } - - !*/ - - public: - - svm_struct_controller_node ( - ); - /*! - ensures - - this object is properly initialized - !*/ - - void set_epsilon ( - double eps - ); - /*! - requires - - eps > 0 - ensures - - #get_epsilon() == eps - !*/ - - double get_epsilon ( - ) const; - /*! - ensures - - returns the error epsilon that determines when training should stop. - Smaller values may result in a more accurate solution but take longer - to execute. Specifically, the algorithm stops when the average sample - risk (i.e. R(w) as defined by the dlib::structural_svm_problem object) is - within epsilon of its optimal value. - - Also note that sample risk is an upper bound on a sample's loss. So - you can think of this epsilon value as saying "solve the optimization - problem until the average loss per sample is within epsilon of it's - optimal value". - !*/ - - double get_cache_based_epsilon ( - ) const; - /*! - ensures - - if (get_max_cache_size() != 0) then - - The solver will not stop when the average sample risk is within - get_epsilon() of its optimal value. Instead, it will keep running - but will run the optimizer completely on the cache until the average - sample risk is within #get_cache_based_epsilon() of its optimal - value. This means that it will perform this additional refinement in - the solution accuracy without making any additional calls to the - separation_oracle(). This is useful when using a nuclear norm - regularization term because it allows you to quickly solve the - optimization problem to a high precision, which in the case of a - nuclear norm regularized problem means that many of the learned - matrices will be low rank or very close to low rank due to the - nuclear norm regularizer. This may not happen without solving the - problem to a high accuracy or their ranks may be difficult to - determine, so the extra accuracy given by the cache based refinement - is very useful. Finally, note that we include the nuclear norm term - as part of the "risk" for the purposes of determining when to stop. - - else - - The value of #get_cache_based_epsilon() has no effect. - !*/ - - void set_cache_based_epsilon ( - double eps - ); - /*! - requires - - eps > 0 - ensures - - #get_cache_based_epsilon() == eps - !*/ - - void set_max_iterations ( - unsigned long max_iter - ); - /*! - ensures - - #get_max_iterations() == max_iter - !*/ - - unsigned long get_max_iterations ( - ); - /*! - ensures - - returns the maximum number of iterations the SVM optimizer is allowed to - run before it is required to stop and return a result. - !*/ - - void add_nuclear_norm_regularizer ( - long first_dimension, - long rows, - long cols, - double regularization_strength - ); - /*! - requires - - 0 <= first_dimension < number of dimensions in problem - - 0 <= rows - - 0 <= cols - - first_dimension+rows*cols <= number of dimensions in problem - - 0 < regularization_strength - ensures - - Adds a nuclear norm regularization term to the optimization problem - solved by this object. That is, instead of solving: - Minimize: h(w) == 0.5*dot(w,w) + C*R(w) - this object will solve: - Minimize: h(w) == 0.5*dot(w,w) + C*R(w) + regularization_strength*nuclear_norm_of(part of w) - where "part of w" is the part of w indicated by the arguments to this - function. In particular, the part of w included in the nuclear norm is - exactly the matrix reshape(rowm(w, range(first_dimension, first_dimension+rows*cols-1)), rows, cols). - Therefore, if you think of the w vector as being the concatenation of a - bunch of matrices then you can use multiple calls to add_nuclear_norm_regularizer() - to add nuclear norm regularization terms to any of the matrices packed into w. - - #num_nuclear_norm_regularizers() == num_nuclear_norm_regularizers() + 1 - !*/ - - unsigned long num_nuclear_norm_regularizers ( - ) const; - /*! - ensures - - returns the number of nuclear norm regularizers that are currently a part - of this optimization problem. That is, returns the number of times - add_nuclear_norm_regularizer() has been called since the last call to - clear_nuclear_norm_regularizers() or object construction, whichever is - most recent. - !*/ - - void clear_nuclear_norm_regularizers ( - ); - /*! - ensures - - #num_nuclear_norm_regularizers() == 0 - !*/ - - void be_verbose ( - ); - /*! - ensures - - This object will print status messages to standard out so that a - user can observe the progress of the algorithm. - !*/ - - void be_quiet( - ); - /*! - ensures - - this object will not print anything to standard out - !*/ - - double get_c ( - ) const; - /*! - ensures - - returns the SVM regularization parameter. It is the parameter that - determines the trade off between trying to fit the training data - exactly or allowing more errors but hopefully improving the - generalization of the resulting classifier. Larger values encourage - exact fitting while smaller values of C may encourage better - generalization. - !*/ - - void set_c ( - double C - ); - /*! - requires - - C > 0 - ensures - - #get_c() == C - !*/ - - void add_processing_node ( - const network_address& addr - ); - /*! - requires - - addr.port != 0 - ensures - - if (this address hasn't already been added) then - - #get_num_processing_nodes() == get_num_processing_nodes() + 1 - - When operator() is invoked to solve the structural svm problem this - object will connect to the svm_struct_processing_node located at the - given network address and will include it in the distributed - optimization. - !*/ - - void add_processing_node ( - const std::string& ip_or_hostname, - unsigned short port - ); - /*! - requires - - port != 0 - ensures - - invokes: add_processing_node(network_address(ip_or_hostname, port)) - !*/ - - unsigned long get_num_processing_nodes ( - ) const; - /*! - ensures - - returns the number of remote processing nodes that have been - registered with this object. - !*/ - - void remove_processing_nodes ( - ); - /*! - ensures - - #get_num_processing_nodes() == 0 - !*/ - - class invalid_problem : public error {}; - - template <typename matrix_type> - double operator() ( - const oca& solver, - matrix_type& w - ) const; - /*! - requires - - get_num_processing_nodes() != 0 - - matrix_type == a dlib::matrix capable of storing column vectors - ensures - - connects to the processing nodes and begins optimizing the structural - svm problem using the given oca solver. - - stores the solution in #w - - returns the objective value at the solution #w - throws - - invalid_problem - This exception is thrown if the svm_struct_processing_nodes disagree - on the dimensionality of the problem. That is, if they disagree on - the value of structural_svm_problem::get_num_dimensions(). - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_STRUCTURAL_SVM_DISTRIBUTeD_ABSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/svm/structural_svm_graph_labeling_problem.h b/ml/dlib/dlib/svm/structural_svm_graph_labeling_problem.h deleted file mode 100644 index c677861c9..000000000 --- a/ml/dlib/dlib/svm/structural_svm_graph_labeling_problem.h +++ /dev/null @@ -1,542 +0,0 @@ -// Copyright (C) 2012 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_STRUCTURAL_SVM_GRAPH_LAbELING_PROBLEM_Hh_ -#define DLIB_STRUCTURAL_SVM_GRAPH_LAbELING_PROBLEM_Hh_ - - -#include "structural_svm_graph_labeling_problem_abstract.h" -#include "../graph_cuts.h" -#include "../matrix.h" -#include "../array.h" -#include <vector> -#include <iterator> -#include "structural_svm_problem_threaded.h" -#include "../graph.h" -#include "sparse_vector.h" -#include <sstream> - -// ---------------------------------------------------------------------------------------- - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename graph_type - > - bool is_graph_labeling_problem ( - const dlib::array<graph_type>& samples, - const std::vector<std::vector<bool> >& labels, - std::string& reason_for_failure - ) - { - typedef typename graph_type::type node_vector_type; - typedef typename graph_type::edge_type edge_vector_type; - // The graph must use all dense vectors or all sparse vectors. It can't mix the two types together. - COMPILE_TIME_ASSERT( (is_matrix<node_vector_type>::value && is_matrix<edge_vector_type>::value) || - (!is_matrix<node_vector_type>::value && !is_matrix<edge_vector_type>::value)); - - - std::ostringstream sout; - reason_for_failure.clear(); - - if (!is_learning_problem(samples, labels)) - { - reason_for_failure = "is_learning_problem(samples, labels) returned false."; - return false; - } - - const bool ismat = is_matrix<typename graph_type::type>::value; - - // these are -1 until assigned with a value - long node_dims = -1; - long edge_dims = -1; - - for (unsigned long i = 0; i < samples.size(); ++i) - { - if (samples[i].number_of_nodes() != labels[i].size()) - { - sout << "samples["<<i<<"].number_of_nodes() doesn't match labels["<<i<<"].size()."; - reason_for_failure = sout.str(); - return false; - } - if (graph_contains_length_one_cycle(samples[i])) - { - sout << "graph_contains_length_one_cycle(samples["<<i<<"]) returned true."; - reason_for_failure = sout.str(); - return false; - } - - for (unsigned long j = 0; j < samples[i].number_of_nodes(); ++j) - { - if (ismat && samples[i].node(j).data.size() == 0) - { - sout << "A graph contains an empty vector at node: samples["<<i<<"].node("<<j<<").data."; - reason_for_failure = sout.str(); - return false; - } - - if (ismat && node_dims == -1) - node_dims = samples[i].node(j).data.size(); - // all nodes must have vectors of the same size. - if (ismat && (long)samples[i].node(j).data.size() != node_dims) - { - sout << "Not all node vectors in samples["<<i<<"] are the same dimension."; - reason_for_failure = sout.str(); - return false; - } - - for (unsigned long n = 0; n < samples[i].node(j).number_of_neighbors(); ++n) - { - if (ismat && samples[i].node(j).edge(n).size() == 0) - { - sout << "A graph contains an empty vector at edge: samples["<<i<<"].node("<<j<<").edge("<<n<<")."; - reason_for_failure = sout.str(); - return false; - } - if (min(samples[i].node(j).edge(n)) < 0) - { - sout << "A graph contains negative values on an edge vector at: samples["<<i<<"].node("<<j<<").edge("<<n<<")."; - reason_for_failure = sout.str(); - return false; - } - - if (ismat && edge_dims == -1) - edge_dims = samples[i].node(j).edge(n).size(); - // all edges must have vectors of the same size. - if (ismat && (long)samples[i].node(j).edge(n).size() != edge_dims) - { - sout << "Not all edge vectors in samples["<<i<<"] are the same dimension."; - reason_for_failure = sout.str(); - return false; - } - } - } - } - - return true; - } - - template < - typename graph_type - > - bool is_graph_labeling_problem ( - const dlib::array<graph_type>& samples, - const std::vector<std::vector<bool> >& labels - ) - { - std::string reason_for_failure; - return is_graph_labeling_problem(samples, labels, reason_for_failure); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename U - > - bool sizes_match ( - const std::vector<std::vector<T> >& lhs, - const std::vector<std::vector<U> >& rhs - ) - { - if (lhs.size() != rhs.size()) - return false; - - for (unsigned long i = 0; i < lhs.size(); ++i) - { - if (lhs[i].size() != rhs[i].size()) - return false; - } - - return true; - } - -// ---------------------------------------------------------------------------------------- - - inline bool all_values_are_nonnegative ( - const std::vector<std::vector<double> >& x - ) - { - for (unsigned long i = 0; i < x.size(); ++i) - { - for (unsigned long j = 0; j < x[i].size(); ++j) - { - if (x[i][j] < 0) - return false; - } - } - return true; - } - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - namespace impl - { - template < - typename T, - typename enable = void - > - struct fvect - { - // In this case type should be some sparse vector type - typedef typename T::type type; - }; - - template < typename T > - struct fvect<T, typename enable_if<is_matrix<typename T::type> >::type> - { - // The point of this stuff is to create the proper matrix - // type to represent the concatenation of an edge vector - // with an node vector. - typedef typename T::type node_mat; - typedef typename T::edge_type edge_mat; - const static long NRd = node_mat::NR; - const static long NRe = edge_mat::NR; - const static long NR = ((NRd!=0) && (NRe!=0)) ? (NRd+NRe) : 0; - typedef typename node_mat::value_type value_type; - - typedef matrix<value_type,NR,1, typename node_mat::mem_manager_type, typename node_mat::layout_type> type; - }; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename graph_type - > - class structural_svm_graph_labeling_problem : noncopyable, - public structural_svm_problem_threaded<matrix<double,0,1>, - typename dlib::impl::fvect<graph_type>::type > - { - public: - typedef matrix<double,0,1> matrix_type; - typedef typename dlib::impl::fvect<graph_type>::type feature_vector_type; - - typedef graph_type sample_type; - - typedef std::vector<bool> label_type; - - structural_svm_graph_labeling_problem( - const dlib::array<sample_type>& samples_, - const std::vector<label_type>& labels_, - const std::vector<std::vector<double> >& losses_, - unsigned long num_threads = 2 - ) : - structural_svm_problem_threaded<matrix_type,feature_vector_type>(num_threads), - samples(samples_), - labels(labels_), - losses(losses_) - { - // make sure requires clause is not broken -#ifdef ENABLE_ASSERTS - std::string reason_for_failure; - DLIB_ASSERT(is_graph_labeling_problem(samples, labels, reason_for_failure) == true , - "\t structural_svm_graph_labeling_problem::structural_svm_graph_labeling_problem()" - << "\n\t Invalid inputs were given to this function." - << "\n\t reason_for_failure: " << reason_for_failure - << "\n\t samples.size(): " << samples.size() - << "\n\t labels.size(): " << labels.size() - << "\n\t this: " << this ); - DLIB_ASSERT((losses.size() == 0 || sizes_match(labels, losses) == true) && - all_values_are_nonnegative(losses) == true, - "\t structural_svm_graph_labeling_problem::structural_svm_graph_labeling_problem()" - << "\n\t Invalid inputs were given to this function." - << "\n\t labels.size(): " << labels.size() - << "\n\t losses.size(): " << losses.size() - << "\n\t sizes_match(labels,losses): " << sizes_match(labels,losses) - << "\n\t all_values_are_nonnegative(losses): " << all_values_are_nonnegative(losses) - << "\n\t this: " << this ); -#endif - - loss_pos = 1.0; - loss_neg = 1.0; - - // figure out how many dimensions are in the node and edge vectors. - node_dims = 0; - edge_dims = 0; - for (unsigned long i = 0; i < samples.size(); ++i) - { - for (unsigned long j = 0; j < samples[i].number_of_nodes(); ++j) - { - node_dims = std::max(node_dims,(long)max_index_plus_one(samples[i].node(j).data)); - for (unsigned long n = 0; n < samples[i].node(j).number_of_neighbors(); ++n) - { - edge_dims = std::max(edge_dims, (long)max_index_plus_one(samples[i].node(j).edge(n))); - } - } - } - } - - const std::vector<std::vector<double> >& get_losses ( - ) const { return losses; } - - long get_num_edge_weights ( - ) const - { - return edge_dims; - } - - void set_loss_on_positive_class ( - double loss - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(loss >= 0 && get_losses().size() == 0, - "\t void structural_svm_graph_labeling_problem::set_loss_on_positive_class()" - << "\n\t Invalid inputs were given to this function." - << "\n\t loss: " << loss - << "\n\t this: " << this ); - - loss_pos = loss; - } - - void set_loss_on_negative_class ( - double loss - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(loss >= 0 && get_losses().size() == 0, - "\t void structural_svm_graph_labeling_problem::set_loss_on_negative_class()" - << "\n\t Invalid inputs were given to this function." - << "\n\t loss: " << loss - << "\n\t this: " << this ); - - loss_neg = loss; - } - - double get_loss_on_negative_class ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(get_losses().size() == 0, - "\t double structural_svm_graph_labeling_problem::get_loss_on_negative_class()" - << "\n\t Invalid inputs were given to this function." - << "\n\t this: " << this ); - - return loss_neg; - } - - double get_loss_on_positive_class ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(get_losses().size() == 0, - "\t double structural_svm_graph_labeling_problem::get_loss_on_positive_class()" - << "\n\t Invalid inputs were given to this function." - << "\n\t this: " << this ); - - return loss_pos; - } - - - private: - virtual long get_num_dimensions ( - ) const - { - // The psi/w vector will begin with all the edge dims and then follow with the node dims. - return edge_dims + node_dims; - } - - virtual long get_num_samples ( - ) const - { - return samples.size(); - } - - template <typename psi_type> - typename enable_if<is_matrix<psi_type> >::type get_joint_feature_vector ( - const sample_type& sample, - const label_type& label, - psi_type& psi - ) const - { - psi.set_size(get_num_dimensions()); - psi = 0; - for (unsigned long i = 0; i < sample.number_of_nodes(); ++i) - { - // accumulate the node vectors - if (label[i] == true) - set_rowm(psi, range(edge_dims, psi.size()-1)) += sample.node(i).data; - - for (unsigned long n = 0; n < sample.node(i).number_of_neighbors(); ++n) - { - const unsigned long j = sample.node(i).neighbor(n).index(); - - // Don't double count edges. Also only include the vector if - // the labels disagree. - if (i < j && label[i] != label[j]) - { - set_rowm(psi, range(0, edge_dims-1)) -= sample.node(i).edge(n); - } - } - } - } - - template <typename T> - void add_to_sparse_vect ( - T& psi, - const T& vect, - unsigned long offset - ) const - { - for (typename T::const_iterator i = vect.begin(); i != vect.end(); ++i) - { - psi.insert(psi.end(), std::make_pair(i->first+offset, i->second)); - } - } - - template <typename T> - void subtract_from_sparse_vect ( - T& psi, - const T& vect - ) const - { - for (typename T::const_iterator i = vect.begin(); i != vect.end(); ++i) - { - psi.insert(psi.end(), std::make_pair(i->first, -i->second)); - } - } - - template <typename psi_type> - typename disable_if<is_matrix<psi_type> >::type get_joint_feature_vector ( - const sample_type& sample, - const label_type& label, - psi_type& psi - ) const - { - psi.clear(); - for (unsigned long i = 0; i < sample.number_of_nodes(); ++i) - { - // accumulate the node vectors - if (label[i] == true) - add_to_sparse_vect(psi, sample.node(i).data, edge_dims); - - for (unsigned long n = 0; n < sample.node(i).number_of_neighbors(); ++n) - { - const unsigned long j = sample.node(i).neighbor(n).index(); - - // Don't double count edges. Also only include the vector if - // the labels disagree. - if (i < j && label[i] != label[j]) - { - subtract_from_sparse_vect(psi, sample.node(i).edge(n)); - } - } - } - } - - virtual void get_truth_joint_feature_vector ( - long idx, - feature_vector_type& psi - ) const - { - get_joint_feature_vector(samples[idx], labels[idx], psi); - } - - virtual void separation_oracle ( - const long idx, - const matrix_type& current_solution, - double& loss, - feature_vector_type& psi - ) const - { - const sample_type& samp = samples[idx]; - - // setup the potts graph based on samples[idx] and current_solution. - graph<double,double>::kernel_1a g; - copy_graph_structure(samp, g); - for (unsigned long i = 0; i < g.number_of_nodes(); ++i) - { - g.node(i).data = dot(rowm(current_solution,range(edge_dims,current_solution.size()-1)), - samp.node(i).data); - - // Include a loss augmentation so that we will get the proper loss augmented - // max when we use find_max_factor_graph_potts() below. - if (labels[idx][i]) - g.node(i).data -= get_loss_for_sample(idx,i,!labels[idx][i]); - else - g.node(i).data += get_loss_for_sample(idx,i,!labels[idx][i]); - - for (unsigned long n = 0; n < g.node(i).number_of_neighbors(); ++n) - { - const unsigned long j = g.node(i).neighbor(n).index(); - // Don't compute an edge weight more than once. - if (i < j) - { - g.node(i).edge(n) = dot(rowm(current_solution,range(0,edge_dims-1)), - samp.node(i).edge(n)); - } - } - - } - - std::vector<node_label> labeling; - find_max_factor_graph_potts(g, labeling); - - - std::vector<bool> bool_labeling; - bool_labeling.reserve(labeling.size()); - // figure out the loss - loss = 0; - for (unsigned long i = 0; i < labeling.size(); ++i) - { - const bool predicted_label = (labeling[i]!= 0); - bool_labeling.push_back(predicted_label); - loss += get_loss_for_sample(idx, i, predicted_label); - } - - // compute psi - get_joint_feature_vector(samp, bool_labeling, psi); - } - - double get_loss_for_sample ( - long sample_idx, - long node_idx, - bool predicted_label - ) const - /*! - requires - - 0 <= sample_idx < labels.size() - - 0 <= node_idx < labels[sample_idx].size() - ensures - - returns the loss incurred for predicting that the node - samples[sample_idx].node(node_idx) has a label of predicted_label. - !*/ - { - const bool true_label = labels[sample_idx][node_idx]; - if (true_label != predicted_label) - { - if (losses.size() != 0) - return losses[sample_idx][node_idx]; - else if (true_label == true) - return loss_pos; - else - return loss_neg; - } - else - { - // no loss for making the correct prediction. - return 0; - } - } - - const dlib::array<sample_type>& samples; - const std::vector<label_type>& labels; - const std::vector<std::vector<double> >& losses; - - long node_dims; - long edge_dims; - double loss_pos; - double loss_neg; - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_STRUCTURAL_SVM_GRAPH_LAbELING_PROBLEM_Hh_ - - diff --git a/ml/dlib/dlib/svm/structural_svm_graph_labeling_problem_abstract.h b/ml/dlib/dlib/svm/structural_svm_graph_labeling_problem_abstract.h deleted file mode 100644 index ab99ed8f4..000000000 --- a/ml/dlib/dlib/svm/structural_svm_graph_labeling_problem_abstract.h +++ /dev/null @@ -1,249 +0,0 @@ -// Copyright (C) 2012 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_STRUCTURAL_SVM_GRAPH_LAbELING_PROBLEM_ABSTRACT_Hh_ -#ifdef DLIB_STRUCTURAL_SVM_GRAPH_LAbELING_PROBLEM_ABSTRACT_Hh_ - -#include "../array/array_kernel_abstract.h" -#include "../graph/graph_kernel_abstract.h" -#include "../matrix/matrix_abstract.h" -#include "sparse_vector_abstract.h" -#include "structural_svm_problem_threaded_abstract.h" -#include <vector> - -// ---------------------------------------------------------------------------------------- - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename graph_type - > - bool is_graph_labeling_problem ( - const dlib::array<graph_type>& samples, - const std::vector<std::vector<bool> >& labels - ); - /*! - requires - - graph_type is an implementation of dlib/graph/graph_kernel_abstract.h - - graph_type::type and graph_type::edge_type are either both dlib::matrix types - capable of containing column vectors or both some kind of sparse vector type - as defined in dlib/svm/sparse_vector_abstract.h. - ensures - - Note that a graph labeling problem is a task to learn a binary classifier which - predicts the correct label for each node in the provided graphs. Additionally, - we have information in the form of edges between nodes where edges are present - when we believe the linked nodes are likely to have the same label. Therefore, - part of a graph labeling problem is to learn to score each edge in terms of how - strongly the edge should enforce labeling consistency between its two nodes. - Thus, to be a valid graph labeling problem, samples should contain example graphs - of connected nodes while labels should indicate the desired label of each node. - The precise requirements for a valid graph labeling problem are listed below. - - This function returns true if all of the following are true and false otherwise: - - is_learning_problem(samples, labels) == true - - All the vectors stored on the edges of each graph in samples - contain only values which are >= 0. - - for all valid i: - - graph_contains_length_one_cycle(samples[i]) == false - - samples[i].number_of_nodes() == labels[i].size() - (i.e. Every graph node gets its own label) - - if (graph_type::edge_type is a dlib::matrix) then - - All the nodes must contain vectors with the same number of dimensions. - - All the edges must contain vectors with the same number of dimensions. - (However, edge vectors may differ in dimension from node vectors.) - - All vectors have non-zero size. That is, they have more than 0 dimensions. - !*/ - - template < - typename graph_type - > - bool is_graph_labeling_problem ( - const dlib::array<graph_type>& samples, - const std::vector<std::vector<bool> >& labels, - std::string& reason_for_failure - ); - /*! - This function is identical to the above version of is_graph_labeling_problem() - except that if it returns false it will populate reason_for_failure with a message - describing why the graph is not a valid learning problem. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename U - > - bool sizes_match ( - const std::vector<std::vector<T> >& lhs, - const std::vector<std::vector<U> >& rhs - ); - /*! - ensures - - returns true if the sizes of lhs and rhs, as well as their constituent vectors - all match. In particular, we return true if all of the following conditions are - met and false otherwise: - - lhs.size() == rhs.size() - - for all valid i: - - lhs[i].size() == rhs[i].size() - !*/ - -// ---------------------------------------------------------------------------------------- - - bool all_values_are_nonnegative ( - const std::vector<std::vector<double> >& x - ); - /*! - ensures - - returns true if all the double values contained in x are >= 0. - !*/ - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename graph_type - > - class structural_svm_graph_labeling_problem : noncopyable, - public structural_svm_problem_threaded<matrix<double,0,1>, - typename graph_type::type > - { - /*! - REQUIREMENTS ON graph_type - - graph_type is an implementation of dlib/graph/graph_kernel_abstract.h - - graph_type::type and graph_type::edge_type must be either matrix objects - capable of representing column vectors or some kind of sparse vector - type as defined in dlib/svm/sparse_vector_abstract.h. - - WHAT THIS OBJECT REPRESENTS - This object is a tool for learning the weight vectors needed to use - a graph_labeler object. It learns the parameter vectors by formulating - the problem as a structural SVM problem. - !*/ - - public: - typedef matrix<double,0,1> matrix_type; - typedef typename graph_type::type feature_vector_type; - typedef graph_type sample_type; - typedef std::vector<bool> label_type; - - structural_svm_graph_labeling_problem( - const dlib::array<sample_type>& samples, - const std::vector<label_type>& labels, - const std::vector<std::vector<double> >& losses, - unsigned long num_threads - ); - /*! - requires - - is_graph_labeling_problem(samples,labels) == true - - if (losses.size() != 0) then - - sizes_match(labels, losses) == true - - all_values_are_nonnegative(losses) == true - ensures - - This object attempts to learn a mapping from the given samples to the - given labels. In particular, it attempts to learn to predict labels[i] - based on samples[i]. Or in other words, this object can be used to learn - parameter vectors, E and W, such that a graph_labeler declared as: - graph_labeler<feature_vector_type> labeler(E,W) - results in a labeler object which attempts to compute the following mapping: - labels[i] == labeler(samples[i]) - - When you use this object with the oca optimizer you get back just one - big parameter vector as the solution. Therefore, note that this single - big vector is the concatenation of E and W. The first get_num_edge_weights() - elements of this vector correspond to E and the rest is W. - - This object will use num_threads threads during the optimization - procedure. You should set this parameter equal to the number of - available processing cores on your machine. - - if (losses.size() == 0) then - - #get_loss_on_positive_class() == 1.0 - - #get_loss_on_negative_class() == 1.0 - - #get_losses().size() == 0 - - The losses argument is effectively ignored if its size is zero. - - else - - #get_losses() == losses - - Each node in the training data has its own loss value defined by - the corresponding entry of losses. In particular, this means that - the node with label labels[i][j] incurs a loss of losses[i][j] if - it is incorrectly labeled. - - The get_loss_on_positive_class() and get_loss_on_negative_class() - parameters are ignored. Only get_losses() is used in this case. - !*/ - - const std::vector<std::vector<double> >& get_losses ( - ) const; - /*! - ensures - - returns the losses vector given to this object's constructor. - This vector defines the per sample loss values used. If the vector - is empty then the loss values defined by get_loss_on_positive_class() and - get_loss_on_positive_class() are used instead. - !*/ - - long get_num_edge_weights ( - ) const; - /*! - ensures - - returns the dimensionality of the edge weight vector. It is also - important to know that when using the oca solver with this object, - you must set it to generate non-negative weights for the edge weight - part of the total weight vector. You can do this by passing get_num_edge_weights() - to the third argument to oca::operator(). - !*/ - - void set_loss_on_positive_class ( - double loss - ); - /*! - requires - - loss >= 0 - - get_losses().size() == 0 - ensures - - #get_loss_on_positive_class() == loss - !*/ - - void set_loss_on_negative_class ( - double loss - ); - /*! - requires - - loss >= 0 - - get_losses().size() == 0 - ensures - - #get_loss_on_negative_class() == loss - !*/ - - double get_loss_on_positive_class ( - ) const; - /*! - requires - - get_losses().size() == 0 - ensures - - returns the loss incurred when a graph node which is supposed to have - a label of true gets misclassified. This value controls how much we care - about correctly classifying nodes which should be labeled as true. Larger - loss values indicate that we care more strongly than smaller values. - !*/ - - double get_loss_on_negative_class ( - ) const; - /*! - requires - - get_losses().size() == 0 - ensures - - returns the loss incurred when a graph node which is supposed to have - a label of false gets misclassified. This value controls how much we care - about correctly classifying nodes which should be labeled as false. Larger - loss values indicate that we care more strongly than smaller values. - !*/ - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_STRUCTURAL_SVM_GRAPH_LAbELING_PROBLEM_ABSTRACT_Hh_ - - - - diff --git a/ml/dlib/dlib/svm/structural_svm_object_detection_problem.h b/ml/dlib/dlib/svm/structural_svm_object_detection_problem.h deleted file mode 100644 index 1c54a42b1..000000000 --- a/ml/dlib/dlib/svm/structural_svm_object_detection_problem.h +++ /dev/null @@ -1,531 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_STRUCTURAL_SVM_ObJECT_DETECTION_PROBLEM_Hh_ -#define DLIB_STRUCTURAL_SVM_ObJECT_DETECTION_PROBLEM_Hh_ - -#include "structural_svm_object_detection_problem_abstract.h" -#include "../matrix.h" -#include "structural_svm_problem_threaded.h" -#include <sstream> -#include "../string.h" -#include "../array.h" -#include "../image_processing/full_object_detection.h" -#include "../image_processing/box_overlap_testing.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_scanner_type, - typename image_array_type - > - class structural_svm_object_detection_problem : public structural_svm_problem_threaded<matrix<double,0,1> >, - noncopyable - { - public: - - structural_svm_object_detection_problem( - const image_scanner_type& scanner, - const test_box_overlap& overlap_tester, - const bool auto_overlap_tester, - const image_array_type& images_, - const std::vector<std::vector<full_object_detection> >& truth_object_detections_, - const std::vector<std::vector<rectangle> >& ignore_, - const test_box_overlap& ignore_overlap_tester_, - unsigned long num_threads = 2 - ) : - structural_svm_problem_threaded<matrix<double,0,1> >(num_threads), - boxes_overlap(overlap_tester), - images(images_), - truth_object_detections(truth_object_detections_), - ignore(ignore_), - ignore_overlap_tester(ignore_overlap_tester_), - match_eps(0.5), - loss_per_false_alarm(1), - loss_per_missed_target(1) - { -#ifdef ENABLE_ASSERTS - // make sure requires clause is not broken - DLIB_ASSERT(is_learning_problem(images_, truth_object_detections_) && - ignore_.size() == images_.size() && - scanner.get_num_detection_templates() > 0, - "\t structural_svm_object_detection_problem::structural_svm_object_detection_problem()" - << "\n\t Invalid inputs were given to this function " - << "\n\t scanner.get_num_detection_templates(): " << scanner.get_num_detection_templates() - << "\n\t is_learning_problem(images_,truth_object_detections_): " << is_learning_problem(images_,truth_object_detections_) - << "\n\t ignore.size(): " << ignore.size() - << "\n\t images.size(): " << images.size() - << "\n\t this: " << this - ); - for (unsigned long i = 0; i < truth_object_detections.size(); ++i) - { - for (unsigned long j = 0; j < truth_object_detections[i].size(); ++j) - { - DLIB_ASSERT(truth_object_detections[i][j].num_parts() == scanner.get_num_movable_components_per_detection_template(), - "\t trained_function_type structural_object_detection_trainer::train()" - << "\n\t invalid inputs were given to this function" - << "\n\t truth_object_detections["<<i<<"]["<<j<<"].num_parts(): " << - truth_object_detections[i][j].num_parts() - << "\n\t scanner.get_num_movable_components_per_detection_template(): " << - scanner.get_num_movable_components_per_detection_template() - << "\n\t all_parts_in_rect(truth_object_detections["<<i<<"]["<<j<<"]): " << all_parts_in_rect(truth_object_detections[i][j]) - ); - } - } -#endif - // The purpose of the max_num_dets member variable is to give us a reasonable - // upper limit on the number of detections we can expect from a single image. - // This is used in the separation_oracle to put a hard limit on the number of - // detections we will consider. We do this purely for computational reasons - // since otherwise we can end up wasting large amounts of time on certain - // pathological cases during optimization which ultimately do not influence the - // result. Therefore, we force the separation oracle to only consider the - // max_num_dets strongest detections. - max_num_dets = 0; - for (unsigned long i = 0; i < truth_object_detections.size(); ++i) - { - if (truth_object_detections[i].size() > max_num_dets) - max_num_dets = truth_object_detections[i].size(); - } - max_num_dets = max_num_dets*3 + 10; - - initialize_scanners(scanner, num_threads); - - if (auto_overlap_tester) - { - auto_configure_overlap_tester(); - } - } - - test_box_overlap get_overlap_tester ( - ) const - { - return boxes_overlap; - } - - void set_match_eps ( - double eps - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(0 < eps && eps < 1, - "\t void structural_svm_object_detection_problem::set_match_eps(eps)" - << "\n\t Invalid inputs were given to this function " - << "\n\t eps: " << eps - << "\n\t this: " << this - ); - - match_eps = eps; - } - - double get_match_eps ( - ) const - { - return match_eps; - } - - double get_loss_per_missed_target ( - ) const - { - return loss_per_missed_target; - } - - void set_loss_per_missed_target ( - double loss - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(loss > 0, - "\t void structural_svm_object_detection_problem::set_loss_per_missed_target(loss)" - << "\n\t Invalid inputs were given to this function " - << "\n\t loss: " << loss - << "\n\t this: " << this - ); - - loss_per_missed_target = loss; - } - - double get_loss_per_false_alarm ( - ) const - { - return loss_per_false_alarm; - } - - void set_loss_per_false_alarm ( - double loss - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(loss > 0, - "\t void structural_svm_object_detection_problem::set_loss_per_false_alarm(loss)" - << "\n\t Invalid inputs were given to this function " - << "\n\t loss: " << loss - << "\n\t this: " << this - ); - - loss_per_false_alarm = loss; - } - - private: - - void auto_configure_overlap_tester( - ) - { - std::vector<std::vector<rectangle> > mapped_rects(truth_object_detections.size()); - for (unsigned long i = 0; i < truth_object_detections.size(); ++i) - { - mapped_rects[i].resize(truth_object_detections[i].size()); - for (unsigned long j = 0; j < truth_object_detections[i].size(); ++j) - { - mapped_rects[i][j] = scanners[i].get_best_matching_rect(truth_object_detections[i][j].get_rect()); - } - } - - boxes_overlap = find_tight_overlap_tester(mapped_rects); - } - - - virtual long get_num_dimensions ( - ) const - { - return scanners[0].get_num_dimensions() + - 1;// for threshold - } - - virtual long get_num_samples ( - ) const - { - return images.size(); - } - - virtual void get_truth_joint_feature_vector ( - long idx, - feature_vector_type& psi - ) const - { - const image_scanner_type& scanner = scanners[idx]; - - psi.set_size(get_num_dimensions()); - std::vector<rectangle> mapped_rects; - - psi = 0; - for (unsigned long i = 0; i < truth_object_detections[idx].size(); ++i) - { - mapped_rects.push_back(scanner.get_best_matching_rect(truth_object_detections[idx][i].get_rect())); - scanner.get_feature_vector(truth_object_detections[idx][i], psi); - } - psi(scanner.get_num_dimensions()) = -1.0*truth_object_detections[idx].size(); - - // check if any of the boxes overlap. If they do then it is impossible for - // us to learn to correctly classify this sample - for (unsigned long i = 0; i < mapped_rects.size(); ++i) - { - for (unsigned long j = i+1; j < mapped_rects.size(); ++j) - { - if (boxes_overlap(mapped_rects[i], mapped_rects[j])) - { - const double area_overlap = mapped_rects[i].intersect(mapped_rects[j]).area(); - const double match_amount = area_overlap/(double)( mapped_rects[i]+mapped_rects[j]).area(); - const double overlap_amount = area_overlap/std::min(mapped_rects[i].area(),mapped_rects[j].area()); - - using namespace std; - ostringstream sout; - sout << "An impossible set of object labels was detected. This is happening because "; - sout << "the truth labels for an image contain rectangles which overlap according to the "; - sout << "test_box_overlap object supplied for non-max suppression. To resolve this, you "; - sout << "either need to relax the test_box_overlap object so it doesn't mark these rectangles as "; - sout << "overlapping or adjust the truth rectangles in your training dataset. "; - - // make sure the above string fits nicely into a command prompt window. - string temp = sout.str(); - sout.str(""); sout << wrap_string(temp,0,0) << endl << endl; - - - sout << "image index: "<< idx << endl; - sout << "The offending rectangles are:\n"; - sout << "rect1: "<< mapped_rects[i] << endl; - sout << "rect2: "<< mapped_rects[j] << endl; - sout << "match amount: " << match_amount << endl; - sout << "overlap amount: " << overlap_amount << endl; - throw dlib::impossible_labeling_error(sout.str()); - } - } - } - - // make sure the mapped rectangles are within match_eps of the - // truth rectangles. - for (unsigned long i = 0; i < mapped_rects.size(); ++i) - { - const double area = (truth_object_detections[idx][i].get_rect().intersect(mapped_rects[i])).area(); - const double total_area = (truth_object_detections[idx][i].get_rect() + mapped_rects[i]).area(); - if (area/total_area <= match_eps) - { - using namespace std; - ostringstream sout; - sout << "An impossible set of object labels was detected. This is happening because "; - sout << "none of the object locations checked by the supplied image scanner is a close "; - sout << "enough match to one of the truth boxes in your training dataset. To resolve this "; - sout << "you need to either lower the match_eps, adjust the settings of the image scanner "; - sout << "so that it is capable of hitting this truth box, or adjust the offending truth rectangle so it "; - sout << "can be matched by the current image scanner. Also, if you "; - sout << "are using the scan_fhog_pyramid object then you could try using a finer image pyramid. "; - sout << "Additionally, the scan_fhog_pyramid scans a fixed aspect ratio box across the image when it "; - sout << "searches for objects. So if you are getting this error and you are using the scan_fhog_pyramid, "; - sout << "it's very likely the problem is that your training dataset contains truth rectangles of widely "; - sout << "varying aspect ratios. The solution is to make sure your training boxes all have about the same aspect ratio. "; - - - // make sure the above string fits nicely into a command prompt window. - string temp = sout.str(); - sout.str(""); sout << wrap_string(temp,0,0) << endl << endl; - - sout << "image index "<< idx << endl; - sout << "match_eps: "<< match_eps << endl; - sout << "best possible match: "<< area/total_area << endl; - sout << "truth rect: "<< truth_object_detections[idx][i].get_rect() << endl; - sout << "truth rect width/height: "<< truth_object_detections[idx][i].get_rect().width()/(double)truth_object_detections[idx][i].get_rect().height() << endl; - sout << "truth rect area: "<< truth_object_detections[idx][i].get_rect().area() << endl; - sout << "nearest detection template rect: "<< mapped_rects[i] << endl; - sout << "nearest detection template rect width/height: "<< mapped_rects[i].width()/(double)mapped_rects[i].height() << endl; - sout << "nearest detection template rect area: "<< mapped_rects[i].area() << endl; - throw dlib::impossible_labeling_error(sout.str()); - } - - } - } - - virtual void separation_oracle ( - const long idx, - const matrix_type& current_solution, - scalar_type& loss, - feature_vector_type& psi - ) const - { - const image_scanner_type& scanner = scanners[idx]; - - std::vector<std::pair<double, rectangle> > dets; - const double thresh = current_solution(scanner.get_num_dimensions()); - - - scanner.detect(current_solution, dets, thresh-loss_per_false_alarm); - - - // The loss will measure the number of incorrect detections. A detection is - // incorrect if it doesn't hit a truth rectangle or if it is a duplicate detection - // on a truth rectangle. - loss = truth_object_detections[idx].size()*loss_per_missed_target; - - // Measure the loss augmented score for the detections which hit a truth rect. - std::vector<double> truth_score_hits(truth_object_detections[idx].size(), 0); - - // keep track of which truth boxes we have hit so far. - std::vector<bool> hit_truth_table(truth_object_detections[idx].size(), false); - - std::vector<rectangle> final_dets; - // The point of this loop is to fill out the truth_score_hits array. - for (unsigned long i = 0; i < dets.size() && final_dets.size() < max_num_dets; ++i) - { - if (overlaps_any_box(boxes_overlap, final_dets, dets[i].second)) - continue; - - const std::pair<double,unsigned int> truth = find_best_match(truth_object_detections[idx], dets[i].second); - - final_dets.push_back(dets[i].second); - - const double truth_match = truth.first; - // if hit truth rect - if (truth_match > match_eps) - { - // if this is the first time we have seen a detect which hit truth_object_detections[idx][truth.second] - const double score = dets[i].first - thresh; - if (hit_truth_table[truth.second] == false) - { - hit_truth_table[truth.second] = true; - truth_score_hits[truth.second] += score; - } - else - { - truth_score_hits[truth.second] += score + loss_per_false_alarm; - } - } - } - - hit_truth_table.assign(hit_truth_table.size(), false); - - final_dets.clear(); -#ifdef ENABLE_ASSERTS - double total_score = 0; -#endif - // Now figure out which detections jointly maximize the loss and detection score sum. We - // need to take into account the fact that allowing a true detection in the output, while - // initially reducing the loss, may allow us to increase the loss later with many duplicate - // detections. - for (unsigned long i = 0; i < dets.size() && final_dets.size() < max_num_dets; ++i) - { - if (overlaps_any_box(boxes_overlap, final_dets, dets[i].second)) - continue; - - const std::pair<double,unsigned int> truth = find_best_match(truth_object_detections[idx], dets[i].second); - - const double truth_match = truth.first; - if (truth_match > match_eps) - { - if (truth_score_hits[truth.second] > loss_per_missed_target) - { - if (!hit_truth_table[truth.second]) - { - hit_truth_table[truth.second] = true; - final_dets.push_back(dets[i].second); -#ifdef ENABLE_ASSERTS - total_score += dets[i].first; -#endif - loss -= loss_per_missed_target; - } - else - { - final_dets.push_back(dets[i].second); -#ifdef ENABLE_ASSERTS - total_score += dets[i].first; -#endif - loss += loss_per_false_alarm; - } - } - } - else if (!overlaps_ignore_box(idx,dets[i].second)) - { - // didn't hit anything - final_dets.push_back(dets[i].second); -#ifdef ENABLE_ASSERTS - total_score += dets[i].first; -#endif - loss += loss_per_false_alarm; - } - } - - psi.set_size(get_num_dimensions()); - psi = 0; - for (unsigned long i = 0; i < final_dets.size(); ++i) - scanner.get_feature_vector(scanner.get_full_object_detection(final_dets[i], current_solution), psi); - -#ifdef ENABLE_ASSERTS - const double psi_score = dot(psi, current_solution); - DLIB_CASSERT(std::abs(psi_score-total_score) <= 1e-4 * std::max(1.0,std::max(std::abs(psi_score),std::abs(total_score))), - "\t The get_feature_vector() and detect() methods of image_scanner_type are not in sync." - << "\n\t The relative error is too large to be attributed to rounding error." - << "\n\t error: " << std::abs(psi_score-total_score) - << "\n\t psi_score: " << psi_score - << "\n\t total_score: " << total_score - ); -#endif - - psi(scanner.get_num_dimensions()) = -1.0*final_dets.size(); - } - - - bool overlaps_ignore_box ( - const long idx, - const dlib::rectangle& rect - ) const - { - for (unsigned long i = 0; i < ignore[idx].size(); ++i) - { - if (ignore_overlap_tester(ignore[idx][i], rect)) - return true; - } - return false; - } - - std::pair<double,unsigned int> find_best_match( - const std::vector<full_object_detection>& boxes, - const rectangle rect - ) const - /*! - ensures - - determines which rectangle in boxes matches rect the most and - returns the amount of this match. Specifically, the match is - a number O with the following properties: - - 0 <= O <= 1 - - Let R be the maximum matching rectangle in boxes, then - O == (R.intersect(rect)).area() / (R + rect).area() - - O == 0 if there is no match with any rectangle. - !*/ - { - double match = 0; - unsigned int best_idx = 0; - for (unsigned long i = 0; i < boxes.size(); ++i) - { - - const unsigned long area = rect.intersect(boxes[i].get_rect()).area(); - if (area != 0) - { - const double new_match = area / static_cast<double>((rect + boxes[i].get_rect()).area()); - if (new_match > match) - { - match = new_match; - best_idx = i; - } - } - } - - return std::make_pair(match,best_idx); - } - - struct init_scanners_helper - { - init_scanners_helper ( - array<image_scanner_type>& scanners_, - const image_array_type& images_ - ) : - scanners(scanners_), - images(images_) - {} - - array<image_scanner_type>& scanners; - const image_array_type& images; - - void operator() (long i ) const - { - scanners[i].load(images[i]); - } - }; - - void initialize_scanners ( - const image_scanner_type& scanner, - unsigned long num_threads - ) - { - scanners.set_max_size(images.size()); - scanners.set_size(images.size()); - - for (unsigned long i = 0; i < scanners.size(); ++i) - scanners[i].copy_configuration(scanner); - - // now load the images into all the scanners - parallel_for(num_threads, 0, scanners.size(), init_scanners_helper(scanners, images)); - } - - - test_box_overlap boxes_overlap; - - mutable array<image_scanner_type> scanners; - - const image_array_type& images; - const std::vector<std::vector<full_object_detection> >& truth_object_detections; - const std::vector<std::vector<rectangle> >& ignore; - const test_box_overlap ignore_overlap_tester; - - unsigned long max_num_dets; - double match_eps; - double loss_per_false_alarm; - double loss_per_missed_target; - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_STRUCTURAL_SVM_ObJECT_DETECTION_PROBLEM_Hh_ - - diff --git a/ml/dlib/dlib/svm/structural_svm_object_detection_problem_abstract.h b/ml/dlib/dlib/svm/structural_svm_object_detection_problem_abstract.h deleted file mode 100644 index d73c5920d..000000000 --- a/ml/dlib/dlib/svm/structural_svm_object_detection_problem_abstract.h +++ /dev/null @@ -1,178 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_STRUCTURAL_SVM_ObJECT_DETECTION_PROBLEM_ABSTRACT_Hh_ -#ifdef DLIB_STRUCTURAL_SVM_ObJECT_DETECTION_PROBLEM_ABSTRACT_Hh_ - -#include "../matrix.h" -#include "structural_svm_problem_threaded_abstract.h" -#include <sstream> -#include "../image_processing/full_object_detection_abstract.h" -#include "../image_processing/box_overlap_testing.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_scanner_type, - typename image_array_type - > - class structural_svm_object_detection_problem : public structural_svm_problem_threaded<matrix<double,0,1> >, - noncopyable - { - /*! - REQUIREMENTS ON image_scanner_type - image_scanner_type must be an implementation of - dlib/image_processing/scan_fhog_pyramid_abstract.h or - dlib/image_processing/scan_image_custom_abstract.h or - dlib/image_processing/scan_image_pyramid_abstract.h or - dlib/image_processing/scan_image_boxes_abstract.h - - REQUIREMENTS ON image_array_type - image_array_type must be an implementation of dlib/array/array_kernel_abstract.h - and it must contain objects which can be accepted by image_scanner_type::load(). - - WHAT THIS OBJECT REPRESENTS - This object is a tool for learning the parameter vector needed to use a - scan_image_pyramid, scan_fhog_pyramid, scan_image_custom, or - scan_image_boxes object. - - It learns the parameter vector by formulating the problem as a structural - SVM problem. The exact details of the method are described in the paper - Max-Margin Object Detection by Davis E. King (http://arxiv.org/abs/1502.00046). - - - !*/ - - public: - - structural_svm_object_detection_problem( - const image_scanner_type& scanner, - const test_box_overlap& overlap_tester, - const bool auto_overlap_tester, - const image_array_type& images, - const std::vector<std::vector<full_object_detection> >& truth_object_detections, - const std::vector<std::vector<rectangle> >& ignore, - const test_box_overlap& ignore_overlap_tester, - unsigned long num_threads = 2 - ); - /*! - requires - - is_learning_problem(images, truth_object_detections) - - ignore.size() == images.size() - - scanner.get_num_detection_templates() > 0 - - scanner.load(images[0]) must be a valid expression. - - for all valid i, j: - - truth_object_detections[i][j].num_parts() == scanner.get_num_movable_components_per_detection_template() - - all_parts_in_rect(truth_object_detections[i][j]) == true - ensures - - This object attempts to learn a mapping from the given images to the - object locations given in truth_object_detections. In particular, it - attempts to learn to predict truth_object_detections[i] based on - images[i]. Or in other words, this object can be used to learn a - parameter vector, w, such that an object_detector declared as: - object_detector<image_scanner_type> detector(scanner,get_overlap_tester(),w) - results in a detector object which attempts to compute the locations of - all the objects in truth_object_detections. So if you called - detector(images[i]) you would hopefully get a list of rectangles back - that had truth_object_detections[i].size() elements and contained exactly - the rectangles indicated by truth_object_detections[i]. - - if (auto_overlap_tester == true) then - - #get_overlap_tester() == a test_box_overlap object that is configured - using the find_tight_overlap_tester() routine and the contents of - truth_object_detections. - - else - - #get_overlap_tester() == overlap_tester - - #get_match_eps() == 0.5 - - This object will use num_threads threads during the optimization - procedure. You should set this parameter equal to the number of - available processing cores on your machine. - - #get_loss_per_missed_target() == 1 - - #get_loss_per_false_alarm() == 1 - - for all valid i: - - Within images[i] any detections that match against a rectangle in - ignore[i], according to ignore_overlap_tester, are ignored. That is, - the optimizer doesn't care if the detector outputs a detection that - matches any of the ignore rectangles or if it fails to output a - detection for an ignore rectangle. Therefore, if there are objects - in your dataset that you are unsure you want to detect or otherwise - don't care if the detector gets or doesn't then you can mark them - with ignore rectangles and the optimizer will simply ignore them. - !*/ - - test_box_overlap get_overlap_tester ( - ) const; - /*! - ensures - - returns the overlap tester used by this object. - !*/ - - void set_match_eps ( - double eps - ); - /*! - requires - - 0 < eps < 1 - ensures - - #get_match_eps() == eps - !*/ - - double get_match_eps ( - ) const; - /*! - ensures - - returns the amount of alignment necessary for a detection to be considered - as matching with a ground truth rectangle. The precise formula for determining - if two rectangles match each other is the following, rectangles A and B match - if and only if: - A.intersect(B).area()/(A+B).area() > get_match_eps() - !*/ - - double get_loss_per_missed_target ( - ) const; - /*! - ensures - - returns the amount of loss experienced for failing to detect one of the - targets. - !*/ - - void set_loss_per_missed_target ( - double loss - ); - /*! - requires - - loss > 0 - ensures - - #get_loss_per_missed_target() == loss - !*/ - - double get_loss_per_false_alarm ( - ) const; - /*! - ensures - - returns the amount of loss experienced for emitting a false alarm detection. - Or in other words, the loss for generating a detection that doesn't correspond - to one of the truth rectangles. - !*/ - - void set_loss_per_false_alarm ( - double loss - ); - /*! - requires - - loss > 0 - ensures - - #get_loss_per_false_alarm() == loss - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_STRUCTURAL_SVM_ObJECT_DETECTION_PROBLEM_ABSTRACT_Hh_ - - - diff --git a/ml/dlib/dlib/svm/structural_svm_problem.h b/ml/dlib/dlib/svm/structural_svm_problem.h deleted file mode 100644 index 3a73457b9..000000000 --- a/ml/dlib/dlib/svm/structural_svm_problem.h +++ /dev/null @@ -1,649 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_STRUCTURAL_SVM_PRObLEM_Hh_ -#define DLIB_STRUCTURAL_SVM_PRObLEM_Hh_ - -#include "structural_svm_problem_abstract.h" -#include "../algs.h" -#include <vector> -#include "../optimization/optimization_oca.h" -#include "../matrix.h" -#include "sparse_vector.h" -#include <iostream> - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - namespace impl - { - struct nuclear_norm_regularizer - { - long first_dimension; - long nr; - long nc; - double regularization_strength; - }; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename structural_svm_problem - > - class cache_element_structural_svm - { - public: - - cache_element_structural_svm ( - ) : prob(0), sample_idx(0), last_true_risk_computed(std::numeric_limits<double>::infinity()) {} - - typedef typename structural_svm_problem::scalar_type scalar_type; - typedef typename structural_svm_problem::matrix_type matrix_type; - typedef typename structural_svm_problem::feature_vector_type feature_vector_type; - - void init ( - const structural_svm_problem* prob_, - const long idx - ) - /*! - ensures - - This object will be a cache for the idx-th sample in the given - structural_svm_problem. - !*/ - { - prob = prob_; - sample_idx = idx; - - loss.clear(); - psi.clear(); - lru_count.clear(); - - if (prob->get_max_cache_size() != 0) - { - prob->get_truth_joint_feature_vector(idx, true_psi); - compact_sparse_vector(true_psi); - } - } - - void get_truth_joint_feature_vector_cached ( - feature_vector_type& psi - ) const - { - if (prob->get_max_cache_size() != 0) - psi = true_psi; - else - prob->get_truth_joint_feature_vector(sample_idx, psi); - - if (is_matrix<feature_vector_type>::value) - { - DLIB_CASSERT((long)psi.size() == prob->get_num_dimensions(), - "The dimensionality of your PSI vector doesn't match get_num_dimensions()"); - } - } - - void separation_oracle_cached ( - const bool use_only_cache, - const bool skip_cache, - const scalar_type& saved_current_risk_gap, - const matrix_type& current_solution, - scalar_type& out_loss, - feature_vector_type& out_psi - ) const - { - const bool cache_enabled = prob->get_max_cache_size() != 0; - - // Don't waste time computing this if the cache isn't going to be used. - const scalar_type dot_true_psi = cache_enabled ? dot(true_psi, current_solution) : 0; - - scalar_type best_risk = -std::numeric_limits<scalar_type>::infinity(); - unsigned long best_idx = 0; - long max_lru_count = 0; - if (cache_enabled) - { - // figure out which element in the cache is the best (i.e. has the biggest risk) - for (unsigned long i = 0; i < loss.size(); ++i) - { - const scalar_type risk = loss[i] + dot(psi[i], current_solution) - dot_true_psi; - if (risk > best_risk) - { - best_risk = risk; - out_loss = loss[i]; - best_idx = i; - } - if (lru_count[i] > max_lru_count) - max_lru_count = lru_count[i]; - } - - if (!skip_cache) - { - // Check if the best psi vector in the cache is still good enough to use as - // a proxy for the true separation oracle. If the risk value has dropped - // by enough to get into the stopping condition then the best psi isn't - // good enough. - if ((best_risk + saved_current_risk_gap > last_true_risk_computed && - best_risk >= 0) || use_only_cache) - { - out_psi = psi[best_idx]; - lru_count[best_idx] = max_lru_count + 1; - return; - } - } - } - - - prob->separation_oracle(sample_idx, current_solution, out_loss, out_psi); - if (is_matrix<feature_vector_type>::value) - { - DLIB_CASSERT((long)out_psi.size() == prob->get_num_dimensions(), - "The dimensionality of your PSI vector doesn't match get_num_dimensions()"); - } - - if (!cache_enabled) - return; - - compact_sparse_vector(out_psi); - - last_true_risk_computed = out_loss + dot(out_psi, current_solution) - dot_true_psi; - - // If the separation oracle is only solved approximately then the result might - // not be as good as just selecting true_psi as the output. So here we check - // if that is the case. - if (last_true_risk_computed < 0 && best_risk < 0) - { - out_psi = true_psi; - out_loss = 0; - } - // Alternatively, an approximate separation oracle might not do as well as just - // selecting from the cache. So if that is the case when just take the best - // element from the cache. - else if (last_true_risk_computed < best_risk) - { - out_psi = psi[best_idx]; - out_loss = loss[best_idx]; - lru_count[best_idx] = max_lru_count + 1; - } - // if the cache is full - else if (loss.size() >= prob->get_max_cache_size()) - { - // find least recently used cache entry for idx-th sample - const long i = index_of_min(mat(lru_count)); - - // save our new data in the cache - loss[i] = out_loss; - psi[i] = out_psi; - - const long max_use = max(mat(lru_count)); - // Make sure this new cache entry has the best lru count since we have used - // it most recently. - lru_count[i] = max_use + 1; - } - else - { - // In this case we just append the new psi into the cache. - - loss.push_back(out_loss); - psi.push_back(out_psi); - long max_use = 1; - if (lru_count.size() != 0) - max_use = max(mat(lru_count)) + 1; - lru_count.push_back(max_use); - } - } - - private: - // Do nothing if T isn't actually a sparse vector - template <typename T> void compact_sparse_vector( T& ) const { } - - template < - typename T, - typename U, - typename alloc - > - void compact_sparse_vector ( - std::vector<std::pair<T,U>,alloc>& vect - ) const - { - // If the sparse vector has more entires than dimensions then it must have some - // duplicate elements. So compact them using make_sparse_vector_inplace(). - if (vect.size() > (unsigned long)prob->get_num_dimensions()) - { - make_sparse_vector_inplace(vect); - // make sure the vector doesn't use more RAM than is necessary - std::vector<std::pair<T,U>,alloc>(vect).swap(vect); - } - } - - const structural_svm_problem* prob; - - long sample_idx; - - mutable feature_vector_type true_psi; - mutable std::vector<scalar_type> loss; - mutable std::vector<feature_vector_type> psi; - mutable std::vector<long> lru_count; - mutable double last_true_risk_computed; - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type_, - typename feature_vector_type_ = matrix_type_ - > - class structural_svm_problem : public oca_problem<matrix_type_> - { - public: - /*! - CONVENTION - - C == get_c() - - eps == get_epsilon() - - max_iterations == get_max_iterations() - - if (skip_cache) then - - we won't use the oracle cache when we need to evaluate the separation - oracle. Instead, we will directly call the user supplied separation_oracle(). - - - get_max_cache_size() == max_cache_size - - - if (cache.size() != 0) then - - cache.size() == get_num_samples() - - for all i: cache[i] == the cached results of calls to separation_oracle() - for the i-th sample. - !*/ - - typedef matrix_type_ matrix_type; - typedef typename matrix_type::type scalar_type; - typedef feature_vector_type_ feature_vector_type; - - structural_svm_problem ( - ) : - saved_current_risk_gap(0), - eps(0.001), - max_iterations(10000), - verbose(false), - skip_cache(true), - count_below_eps(0), - max_cache_size(5), - converged(false), - nuclear_norm_part(0), - cache_based_eps(std::numeric_limits<scalar_type>::infinity()), - C(1) - {} - - scalar_type get_cache_based_epsilon ( - ) const - { - return cache_based_eps; - } - - void set_cache_based_epsilon ( - scalar_type eps_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(eps_ > 0, - "\t void structural_svm_problem::set_cache_based_epsilon()" - << "\n\t eps_ must be greater than 0" - << "\n\t eps_: " << eps_ - << "\n\t this: " << this - ); - - cache_based_eps = eps_; - } - - void set_epsilon ( - scalar_type eps_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(eps_ > 0, - "\t void structural_svm_problem::set_epsilon()" - << "\n\t eps_ must be greater than 0" - << "\n\t eps_: " << eps_ - << "\n\t this: " << this - ); - - eps = eps_; - } - - const scalar_type get_epsilon ( - ) const { return eps; } - - unsigned long get_max_iterations ( - ) const { return max_iterations; } - - void set_max_iterations ( - unsigned long max_iter - ) - { - max_iterations = max_iter; - } - - void set_max_cache_size ( - unsigned long max_size - ) - { - max_cache_size = max_size; - } - - unsigned long get_max_cache_size ( - ) const { return max_cache_size; } - - void be_verbose ( - ) - { - verbose = true; - } - - void be_quiet( - ) - { - verbose = false; - } - - scalar_type get_c ( - ) const { return C; } - - void set_c ( - scalar_type C_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(C_ > 0, - "\t void structural_svm_problem::set_c()" - << "\n\t C_ must be greater than 0" - << "\n\t C_: " << C_ - << "\n\t this: " << this - ); - - C = C_; - } - - void add_nuclear_norm_regularizer ( - long first_dimension, - long rows, - long cols, - double regularization_strength - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(0 <= first_dimension && first_dimension < get_num_dimensions() && - 0 <= rows && 0 <= cols && rows*cols+first_dimension <= get_num_dimensions() && - 0 < regularization_strength, - "\t void structural_svm_problem::add_nuclear_norm_regularizer()" - << "\n\t Invalid arguments were given to this function." - << "\n\t first_dimension: " << first_dimension - << "\n\t rows: " << rows - << "\n\t cols: " << cols - << "\n\t get_num_dimensions(): " << get_num_dimensions() - << "\n\t regularization_strength: " << regularization_strength - << "\n\t this: " << this - ); - - impl::nuclear_norm_regularizer temp; - temp.first_dimension = first_dimension; - temp.nr = rows; - temp.nc = cols; - temp.regularization_strength = regularization_strength; - nuclear_norm_regularizers.push_back(temp); - } - - unsigned long num_nuclear_norm_regularizers ( - ) const { return nuclear_norm_regularizers.size(); } - - void clear_nuclear_norm_regularizers ( - ) { nuclear_norm_regularizers.clear(); } - - virtual long get_num_dimensions ( - ) const = 0; - - virtual long get_num_samples ( - ) const = 0; - - virtual void get_truth_joint_feature_vector ( - long idx, - feature_vector_type& psi - ) const = 0; - - virtual void separation_oracle ( - const long idx, - const matrix_type& current_solution, - scalar_type& loss, - feature_vector_type& psi - ) const = 0; - - private: - - virtual bool risk_has_lower_bound ( - scalar_type& lower_bound - ) const - { - lower_bound = 0; - return true; - } - - virtual bool optimization_status ( - scalar_type current_objective_value, - scalar_type current_error_gap, - scalar_type current_risk_value, - scalar_type current_risk_gap, - unsigned long num_cutting_planes, - unsigned long num_iterations - ) const - { - if (verbose) - { - using namespace std; - if (nuclear_norm_regularizers.size() != 0) - { - cout << "objective: " << current_objective_value << endl; - cout << "objective gap: " << current_error_gap << endl; - cout << "risk: " << current_risk_value-nuclear_norm_part << endl; - cout << "risk+nuclear norm: " << current_risk_value << endl; - cout << "risk+nuclear norm gap: " << current_risk_gap << endl; - cout << "num planes: " << num_cutting_planes << endl; - cout << "iter: " << num_iterations << endl; - } - else - { - cout << "objective: " << current_objective_value << endl; - cout << "objective gap: " << current_error_gap << endl; - cout << "risk: " << current_risk_value << endl; - cout << "risk gap: " << current_risk_gap << endl; - cout << "num planes: " << num_cutting_planes << endl; - cout << "iter: " << num_iterations << endl; - } - cout << endl; - } - - if (num_iterations >= max_iterations) - return true; - - saved_current_risk_gap = current_risk_gap; - - if (converged) - { - return (current_risk_gap < std::max(cache_based_eps,cache_based_eps*current_risk_value)) || - (current_risk_gap == 0); - } - - if (current_risk_gap < eps) - { - // Only stop when we see that the risk gap is small enough on a non-cached - // iteration. But even then, if we are supposed to do the cache based - // refinement then we just mark that we have "converged" to avoid further - // calls to the separation oracle and run all subsequent iterations off the - // cache. - if (skip_cache || max_cache_size == 0) - { - converged = true; - skip_cache = false; - return (current_risk_gap < std::max(cache_based_eps,cache_based_eps*current_risk_value)) || - (current_risk_gap == 0); - } - - ++count_below_eps; - - // Only disable the cache if we have seen a few consecutive iterations that - // look to have converged. - if (count_below_eps > 1) - { - // Instead of stopping we shouldn't use the cache on the next iteration. This way - // we can be sure to have the best solution rather than assuming the cache is up-to-date - // enough. - skip_cache = true; - count_below_eps = 0; - } - } - else - { - count_below_eps = 0; - skip_cache = false; - } - - return false; - } - - virtual void get_risk ( - matrix_type& w, - scalar_type& risk, - matrix_type& subgradient - ) const - { - feature_vector_type ftemp; - const unsigned long num = get_num_samples(); - - // initialize the cache and compute psi_true. - if (cache.size() == 0) - { - cache.resize(get_num_samples()); - for (unsigned long i = 0; i < cache.size(); ++i) - cache[i].init(this,i); - - psi_true.set_size(w.size(),1); - psi_true = 0; - - for (unsigned long i = 0; i < num; ++i) - { - cache[i].get_truth_joint_feature_vector_cached(ftemp); - - subtract_from(psi_true, ftemp); - } - } - - subgradient = psi_true; - scalar_type total_loss = 0; - call_separation_oracle_on_all_samples(w,subgradient,total_loss); - - subgradient /= num; - total_loss /= num; - risk = total_loss + dot(subgradient,w); - - if (nuclear_norm_regularizers.size() != 0) - { - matrix_type grad; - scalar_type obj; - compute_nuclear_norm_parts(w, grad, obj); - risk += obj; - subgradient += grad; - } - } - - virtual void call_separation_oracle_on_all_samples ( - const matrix_type& w, - matrix_type& subgradient, - scalar_type& total_loss - ) const - { - feature_vector_type ftemp; - const unsigned long num = get_num_samples(); - for (unsigned long i = 0; i < num; ++i) - { - scalar_type loss; - separation_oracle_cached(i, w, loss, ftemp); - total_loss += loss; - add_to(subgradient, ftemp); - } - } - - protected: - - void compute_nuclear_norm_parts( - const matrix_type& m, - matrix_type& grad, - scalar_type& obj - ) const - { - obj = 0; - grad.set_size(m.size(), 1); - grad = 0; - - matrix<double> u,v,w,f; - nuclear_norm_part = 0; - for (unsigned long i = 0; i < nuclear_norm_regularizers.size(); ++i) - { - const long nr = nuclear_norm_regularizers[i].nr; - const long nc = nuclear_norm_regularizers[i].nc; - const long size = nr*nc; - const long idx = nuclear_norm_regularizers[i].first_dimension; - const double strength = nuclear_norm_regularizers[i].regularization_strength; - - f = matrix_cast<double>(reshape(rowm(m, range(idx, idx+size-1)), nr, nc)); - svd3(f, u,w,v); - - - const double norm = sum(w); - obj += strength*norm; - nuclear_norm_part += strength*norm/C; - - f = u*trans(v); - - set_rowm(grad, range(idx, idx+size-1)) = matrix_cast<double>(strength*reshape_to_column_vector(f)); - } - - obj /= C; - grad /= C; - } - - void separation_oracle_cached ( - const long idx, - const matrix_type& current_solution, - scalar_type& loss, - feature_vector_type& psi - ) const - { - cache[idx].separation_oracle_cached(converged, - skip_cache, - saved_current_risk_gap, - current_solution, - loss, - psi); - } - - std::vector<impl::nuclear_norm_regularizer> nuclear_norm_regularizers; - - mutable scalar_type saved_current_risk_gap; - mutable matrix_type psi_true; - scalar_type eps; - unsigned long max_iterations; - mutable bool verbose; - - - mutable std::vector<cache_element_structural_svm<structural_svm_problem> > cache; - mutable bool skip_cache; - mutable int count_below_eps; - unsigned long max_cache_size; - mutable bool converged; - mutable double nuclear_norm_part; - scalar_type cache_based_eps; - - scalar_type C; - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_STRUCTURAL_SVM_PRObLEM_Hh_ - diff --git a/ml/dlib/dlib/svm/structural_svm_problem_abstract.h b/ml/dlib/dlib/svm/structural_svm_problem_abstract.h deleted file mode 100644 index 20b3d73a7..000000000 --- a/ml/dlib/dlib/svm/structural_svm_problem_abstract.h +++ /dev/null @@ -1,348 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_STRUCTURAL_SVM_PRObLEM_ABSTRACT_Hh_ -#ifdef DLIB_STRUCTURAL_SVM_PRObLEM_ABSTRACT_Hh_ - -#include "../optimization/optimization_oca_abstract.h" -#include "sparse_vector_abstract.h" -#include "../matrix.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type_, - typename feature_vector_type_ = matrix_type_ - > - class structural_svm_problem : public oca_problem<matrix_type_> - { - public: - /*! - REQUIREMENTS ON matrix_type_ - - matrix_type_ == a dlib::matrix capable of storing column vectors - - REQUIREMENTS ON feature_vector_type_ - - feature_vector_type_ == a dlib::matrix capable of storing column vectors - or an unsorted sparse vector type as defined in dlib/svm/sparse_vector_abstract.h. - - INITIAL VALUE - - get_epsilon() == 0.001 - - get_max_iterations() == 10000 - - get_max_cache_size() == 5 - - get_c() == 1 - - get_cache_based_epsilon() == std::numeric_limits<scalar_type>::infinity() - (I.e. the cache based epsilon feature is disabled) - - num_nuclear_norm_regularizers() == 0 - - This object will not be verbose - - WHAT THIS OBJECT REPRESENTS - This object is a tool for solving the optimization problem associated with - a structural support vector machine. A structural SVM is a supervised - machine learning method for learning to predict complex outputs. This is - contrasted with a binary classifier which makes only simple yes/no - predictions. A structural SVM, on the other hand, can learn to predict - complex outputs such as entire parse trees or DNA sequence alignments. To - do this, it learns a function F(x,y) which measures how well a particular - data sample x matches a label y. When used for prediction, the best label - for a new x is given by the y which maximizes F(x,y). - - To use this object you inherit from it, provide implementations of its four - pure virtual functions, and then pass your object to the oca optimizer. - Also, you should only pass an instance of this object to the oca optimizer - once. That is, the act of using a structural_svm_problem instance with the - oca solver "uses" the structural_svm_problem instance. If you want to - solve the same problem multiple times then you must use a fresh instance of - your structural_svm_problem. - - - To define the optimization problem precisely, we first introduce some notation: - - let PSI(x,y) == the joint feature vector for input x and a label y. - - let F(x,y|w) == dot(w,PSI(x,y)). - - let LOSS(idx,y) == the loss incurred for predicting that the idx-th training - sample has a label of y. Note that LOSS() should always be >= 0 and should - become exactly 0 when y is the correct label for the idx-th sample. - - let x_i == the i-th training sample. - - let y_i == the correct label for the i-th training sample. - - The number of data samples is N. - - Then the optimization problem solved using this object is the following: - Minimize: h(w) == 0.5*dot(w,w) + C*R(w) - - Where R(w) == sum from i=1 to N: 1/N * sample_risk(i,w) - and sample_risk(i,w) == max over all Y: LOSS(i,Y) + F(x_i,Y|w) - F(x_i,y_i|w) - and C > 0 - - - - For an introduction to structured support vector machines you should consult - the following paper: - Predicting Structured Objects with Support Vector Machines by - Thorsten Joachims, Thomas Hofmann, Yisong Yue, and Chun-nam Yu - - For a more detailed discussion of the particular algorithm implemented by this - object see the following paper: - T. Joachims, T. Finley, Chun-Nam Yu, Cutting-Plane Training of Structural SVMs, - Machine Learning, 77(1):27-59, 2009. - - Note that this object is essentially a tool for solving the 1-Slack structural - SVM with margin-rescaling. Specifically, see Algorithm 3 in the above referenced - paper. - !*/ - - typedef matrix_type_ matrix_type; - typedef typename matrix_type::type scalar_type; - typedef feature_vector_type_ feature_vector_type; - - structural_svm_problem ( - ); - /*! - ensures - - this object is properly initialized - !*/ - - void set_epsilon ( - scalar_type eps - ); - /*! - requires - - eps > 0 - ensures - - #get_epsilon() == eps - !*/ - - const scalar_type get_epsilon ( - ) const; - /*! - ensures - - returns the error epsilon that determines when training should stop. - Smaller values may result in a more accurate solution but take longer - to execute. Specifically, the algorithm stops when the average sample - risk (i.e. R(w) as defined above) is within epsilon of its optimal value. - - Also note that sample risk is an upper bound on a sample's loss. So - you can think of this epsilon value as saying "solve the optimization - problem until the average loss per sample is within epsilon of it's - optimal value". - !*/ - - scalar_type get_cache_based_epsilon ( - ) const; - /*! - ensures - - if (get_max_cache_size() != 0) then - - The solver will not stop when the average sample risk is within - get_epsilon() of its optimal value. Instead, it will keep running - but will run the optimizer completely on the cache until the average - sample risk is within #get_cache_based_epsilon() of its optimal - value. This means that it will perform this additional refinement in - the solution accuracy without making any additional calls to the - separation_oracle(). This is useful when using a nuclear norm - regularization term because it allows you to quickly solve the - optimization problem to a high precision, which in the case of a - nuclear norm regularized problem means that many of the learned - matrices will be low rank or very close to low rank due to the - nuclear norm regularizer. This may not happen without solving the - problem to a high accuracy or their ranks may be difficult to - determine, so the extra accuracy given by the cache based refinement - is very useful. Finally, note that we include the nuclear norm term - as part of the "risk" for the purposes of determining when to stop. - - else - - The value of #get_cache_based_epsilon() has no effect. - !*/ - - void set_cache_based_epsilon ( - scalar_type eps - ); - /*! - requires - - eps > 0 - ensures - - #get_cache_based_epsilon() == eps - !*/ - - void set_max_iterations ( - unsigned long max_iter - ); - /*! - ensures - - #get_max_iterations() == max_iter - !*/ - - unsigned long get_max_iterations ( - ); - /*! - ensures - - returns the maximum number of iterations the SVM optimizer is allowed to - run before it is required to stop and return a result. - !*/ - - void set_max_cache_size ( - unsigned long max_size - ); - /*! - ensures - - #get_max_cache_size() == max_size - !*/ - - unsigned long get_max_cache_size ( - ) const; - /*! - ensures - - Returns the number of joint feature vectors per training sample kept in - the separation oracle cache. This cache is used to avoid unnecessary - calls to the user supplied separation_oracle() function. Note that a - value of 0 means that caching is not used at all. This is appropriate - if the separation oracle is cheap to evaluate. - !*/ - - void add_nuclear_norm_regularizer ( - long first_dimension, - long rows, - long cols, - double regularization_strength - ); - /*! - requires - - 0 <= first_dimension < get_num_dimensions() - - 0 <= rows - - 0 <= cols - - first_dimension+rows*cols <= get_num_dimensions() - - 0 < regularization_strength - ensures - - Adds a nuclear norm regularization term to the optimization problem - solved by this object. That is, instead of solving: - Minimize: h(w) == 0.5*dot(w,w) + C*R(w) - this object will solve: - Minimize: h(w) == 0.5*dot(w,w) + C*R(w) + regularization_strength*nuclear_norm_of(part of w) - where "part of w" is the part of w indicated by the arguments to this - function. In particular, the part of w included in the nuclear norm is - exactly the matrix reshape(rowm(w, range(first_dimension, first_dimension+rows*cols-1)), rows, cols). - Therefore, if you think of the w vector as being the concatenation of a - bunch of matrices then you can use multiple calls to add_nuclear_norm_regularizer() - to add nuclear norm regularization terms to any of the matrices packed into w. - - #num_nuclear_norm_regularizers() == num_nuclear_norm_regularizers() + 1 - !*/ - - unsigned long num_nuclear_norm_regularizers ( - ) const; - /*! - ensures - - returns the number of nuclear norm regularizers that are currently a part - of this optimization problem. That is, returns the number of times - add_nuclear_norm_regularizer() has been called since the last call to - clear_nuclear_norm_regularizers() or object construction, whichever is - most recent. - !*/ - - void clear_nuclear_norm_regularizers ( - ); - /*! - ensures - - #num_nuclear_norm_regularizers() == 0 - !*/ - - void be_verbose ( - ); - /*! - ensures - - This object will print status messages to standard out so that a - user can observe the progress of the algorithm. - !*/ - - void be_quiet( - ); - /*! - ensures - - this object will not print anything to standard out - !*/ - - scalar_type get_c ( - ) const; - /*! - ensures - - returns the SVM regularization parameter. It is the parameter that - determines the trade off between trying to fit the training data - exactly or allowing more errors but hopefully improving the - generalization of the resulting classifier. Larger values encourage - exact fitting while smaller values of C may encourage better - generalization. - !*/ - - void set_c ( - scalar_type C - ); - /*! - requires - - C > 0 - ensures - - #get_c() == C - !*/ - - // -------------------------------- - // User supplied routines - // -------------------------------- - - virtual long get_num_dimensions ( - ) const = 0; - /*! - ensures - - returns the dimensionality of a joint feature vector - !*/ - - virtual long get_num_samples ( - ) const = 0; - /*! - ensures - - returns the number of training samples in this problem. - !*/ - - virtual void get_truth_joint_feature_vector ( - long idx, - feature_vector_type& psi - ) const = 0; - /*! - requires - - 0 <= idx < get_num_samples() - ensures - - #psi == PSI(x_idx, y_idx) - (i.e. the joint feature vector for the idx-th training sample its true label.) - !*/ - - virtual void separation_oracle ( - const long idx, - const matrix_type& current_solution, - scalar_type& loss, - feature_vector_type& psi - ) const = 0; - /*! - requires - - 0 <= idx < get_num_samples() - - current_solution.size() == get_num_dimensions() - ensures - - runs the separation oracle on the idx-th sample. We define this as follows: - - let X == the idx-th training sample. - - let PSI(X,y) == the joint feature vector for input X and an arbitrary label y. - - let F(X,y) == dot(current_solution,PSI(X,y)). - - let LOSS(idx,y) == the loss incurred for predicting that the idx-th sample - has a label of y. Note that LOSS() should always be >= 0 and should - become exactly 0 when y is the correct label for the idx-th sample. - - Then the separation oracle finds a Y such that: - Y = argmax over all y: LOSS(idx,y) + F(X,y) - (i.e. It finds the label which maximizes the above expression.) - - Finally, we can define the outputs of this function as: - - #loss == LOSS(idx,Y) - - #psi == PSI(X,Y) - !*/ - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_STRUCTURAL_SVM_PRObLEM_ABSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/svm/structural_svm_problem_threaded.h b/ml/dlib/dlib/svm/structural_svm_problem_threaded.h deleted file mode 100644 index e981ba8d9..000000000 --- a/ml/dlib/dlib/svm/structural_svm_problem_threaded.h +++ /dev/null @@ -1,157 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_STRUCTURAL_SVM_PRObLEM_THREADED_Hh_ -#define DLIB_STRUCTURAL_SVM_PRObLEM_THREADED_Hh_ - -#include "structural_svm_problem_threaded_abstract.h" -#include "../algs.h" -#include <vector> -#include "structural_svm_problem.h" -#include "../matrix.h" -#include "sparse_vector.h" -#include <iostream> -#include "../threads.h" -#include "../misc_api.h" -#include "../statistics.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type_, - typename feature_vector_type_ = matrix_type_ - > - class structural_svm_problem_threaded : public structural_svm_problem<matrix_type_,feature_vector_type_> - { - public: - - typedef matrix_type_ matrix_type; - typedef typename matrix_type::type scalar_type; - typedef feature_vector_type_ feature_vector_type; - - explicit structural_svm_problem_threaded ( - unsigned long num_threads - ) : - tp(num_threads), - num_iterations_executed(0) - {} - - unsigned long get_num_threads ( - ) const { return tp.num_threads_in_pool(); } - - private: - - struct binder - { - binder ( - const structural_svm_problem_threaded& self_, - const matrix_type& w_, - matrix_type& subgradient_, - scalar_type& total_loss_, - bool buffer_subgradients_locally_ - ) : self(self_), w(w_), subgradient(subgradient_), total_loss(total_loss_), - buffer_subgradients_locally(buffer_subgradients_locally_){} - - void call_oracle ( - long begin, - long end - ) - { - // If we are only going to call the separation oracle once then don't run - // the slightly more complex for loop version of this code. Or if we just - // don't want to run the complex buffering one. The code later on decides - // if we should do the buffering based on how long it takes to execute. We - // do this because, when the subgradient is really high dimensional it can - // take a lot of time to add them together. So we might want to avoid - // doing that. - if (end-begin <= 1 || !buffer_subgradients_locally) - { - scalar_type loss; - feature_vector_type ftemp; - for (long i = begin; i < end; ++i) - { - self.separation_oracle_cached(i, w, loss, ftemp); - - auto_mutex lock(self.accum_mutex); - total_loss += loss; - add_to(subgradient, ftemp); - } - } - else - { - scalar_type loss = 0; - matrix_type faccum(subgradient.size(),1); - faccum = 0; - - feature_vector_type ftemp; - - for (long i = begin; i < end; ++i) - { - scalar_type loss_temp; - self.separation_oracle_cached(i, w, loss_temp, ftemp); - loss += loss_temp; - add_to(faccum, ftemp); - } - - auto_mutex lock(self.accum_mutex); - total_loss += loss; - add_to(subgradient, faccum); - } - } - - const structural_svm_problem_threaded& self; - const matrix_type& w; - matrix_type& subgradient; - scalar_type& total_loss; - bool buffer_subgradients_locally; - }; - - - virtual void call_separation_oracle_on_all_samples ( - const matrix_type& w, - matrix_type& subgradient, - scalar_type& total_loss - ) const - { - ++num_iterations_executed; - - const uint64 start_time = ts.get_timestamp(); - - bool buffer_subgradients_locally = with_buffer_time.mean() < without_buffer_time.mean(); - - // every 50 iterations we should try to flip the buffering scheme to see if - // doing it the other way might be better. - if ((num_iterations_executed%50) == 0) - { - buffer_subgradients_locally = !buffer_subgradients_locally; - } - - binder b(*this, w, subgradient, total_loss, buffer_subgradients_locally); - parallel_for_blocked(tp, 0, this->get_num_samples(), b, &binder::call_oracle); - - const uint64 stop_time = ts.get_timestamp(); - - if (buffer_subgradients_locally) - with_buffer_time.add(stop_time-start_time); - else - without_buffer_time.add(stop_time-start_time); - - } - - mutable thread_pool tp; - mutable mutex accum_mutex; - mutable timestamper ts; - mutable running_stats<double> with_buffer_time; - mutable running_stats<double> without_buffer_time; - mutable unsigned long num_iterations_executed; - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_STRUCTURAL_SVM_PRObLEM_THREADED_Hh_ - - diff --git a/ml/dlib/dlib/svm/structural_svm_problem_threaded_abstract.h b/ml/dlib/dlib/svm/structural_svm_problem_threaded_abstract.h deleted file mode 100644 index 3cfc6a6eb..000000000 --- a/ml/dlib/dlib/svm/structural_svm_problem_threaded_abstract.h +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_STRUCTURAL_SVM_PRObLEM_THREADED_ABSTRACT_Hh_ -#ifdef DLIB_STRUCTURAL_SVM_PRObLEM_THREADED_ABSTRACT_Hh_ - -#include "structural_svm_problem_abstract.h" -#include "../matrix.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type_, - typename feature_vector_type_ = matrix_type_ - > - class structural_svm_problem_threaded : public structural_svm_problem<matrix_type_,feature_vector_type_> - { - public: - /*! - WHAT THIS OBJECT REPRESENTS - This object is identical to the structural_svm_problem object defined in - dlib/svm/structural_svm_problem_abstract.h except that its constructor - takes a number which defines how many threads will be used to make concurrent - calls to the separation_oracle() routine. - - So this object lets you take advantage of a multi-core system. You should - set the num_threads parameter equal to the number of available cores. Note - that the separation_oracle() function which you provide must be thread safe - if you are to use this version of the structural_svm_problem. In - particular, it must be safe to call separation_oracle() concurrently from - different threads. However, it is guaranteed that different threads will - never make concurrent calls to separation_oracle() using the same idx value - (i.e. the first argument). - !*/ - - typedef matrix_type_ matrix_type; - typedef typename matrix_type::type scalar_type; - typedef feature_vector_type_ feature_vector_type; - - structural_svm_problem ( - unsigned long num_threads - ); - /*! - ensures - - this object is properly initialized - - #get_num_threads() == num_threads - !*/ - - unsigned long get_num_threads ( - ) const; - /*! - ensures - - Returns the number of threads which will be used to make concurrent - calls to the separation_oracle() function. - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_STRUCTURAL_SVM_PRObLEM_THREADED_ABSTRACT_Hh_ - - - diff --git a/ml/dlib/dlib/svm/structural_svm_sequence_labeling_problem.h b/ml/dlib/dlib/svm/structural_svm_sequence_labeling_problem.h deleted file mode 100644 index 68dff66f5..000000000 --- a/ml/dlib/dlib/svm/structural_svm_sequence_labeling_problem.h +++ /dev/null @@ -1,281 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_STRUCTURAL_SVM_SEQUENCE_LaBELING_PROBLEM_Hh_ -#define DLIB_STRUCTURAL_SVM_SEQUENCE_LaBELING_PROBLEM_Hh_ - - -#include "structural_svm_sequence_labeling_problem_abstract.h" -#include "../matrix.h" -#include "sequence_labeler.h" -#include <vector> -#include "structural_svm_problem_threaded.h" - -// ---------------------------------------------------------------------------------------- - -namespace dlib -{ - - namespace fe_helpers - { - - // ---------------------------------------------------------------------------------------- - - struct get_feats_functor - { - get_feats_functor(std::vector<std::pair<unsigned long, double> >& feats_) : feats(feats_) {} - - inline void operator() ( - unsigned long feat_index, - double feat_value - ) - { - feats.push_back(std::make_pair(feat_index, feat_value)); - } - - inline void operator() ( - unsigned long feat_index - ) - { - feats.push_back(std::make_pair(feat_index, 1)); - } - - std::vector<std::pair<unsigned long, double> >& feats; - }; - - // ---------------------------------------------------------------------------------------- - - template <typename feature_extractor, typename sequence_type, typename EXP2> - void get_feature_vector( - std::vector<std::pair<unsigned long, double> >& feats, - const feature_extractor& fe, - const sequence_type& sequence, - const matrix_exp<EXP2>& candidate_labeling, - unsigned long position - ) - { - get_feats_functor funct(feats); - fe.get_features(funct, sequence,candidate_labeling, position); - } - - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - class structural_svm_sequence_labeling_problem : noncopyable, - public structural_svm_problem_threaded<matrix<double,0,1>, std::vector<std::pair<unsigned long,double> > > - { - public: - typedef matrix<double,0,1> matrix_type; - typedef std::vector<std::pair<unsigned long, double> > feature_vector_type; - - typedef typename feature_extractor::sequence_type sequence_type; - - structural_svm_sequence_labeling_problem( - const std::vector<sequence_type>& samples_, - const std::vector<std::vector<unsigned long> >& labels_, - const feature_extractor& fe_, - unsigned long num_threads = 2 - ) : - structural_svm_problem_threaded<matrix_type,feature_vector_type>(num_threads), - samples(samples_), - labels(labels_), - fe(fe_) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_sequence_labeling_problem(samples,labels) == true && - contains_invalid_labeling(fe, samples, labels) == false, - "\t structural_svm_sequence_labeling_problem::structural_svm_sequence_labeling_problem()" - << "\n\t invalid inputs were given to this function" - << "\n\t samples.size(): " << samples.size() - << "\n\t is_sequence_labeling_problem(samples,labels): " << is_sequence_labeling_problem(samples,labels) - << "\n\t contains_invalid_labeling(fe,samples,labels): " << contains_invalid_labeling(fe,samples,labels) - << "\n\t this: " << this - ); - -#ifdef ENABLE_ASSERTS - for (unsigned long i = 0; i < labels.size(); ++i) - { - for (unsigned long j = 0; j < labels[i].size(); ++j) - { - // make sure requires clause is not broken - DLIB_ASSERT(labels[i][j] < fe.num_labels(), - "\t structural_svm_sequence_labeling_problem::structural_svm_sequence_labeling_problem()" - << "\n\t The given labels in labels are invalid." - << "\n\t labels[i][j]: " << labels[i][j] - << "\n\t fe.num_labels(): " << fe.num_labels() - << "\n\t i: " << i - << "\n\t j: " << j - << "\n\t this: " << this - ); - } - } -#endif - - loss_values.assign(num_labels(), 1); - - } - - unsigned long num_labels ( - ) const { return fe.num_labels(); } - - double get_loss ( - unsigned long label - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(label < num_labels(), - "\t void structural_svm_sequence_labeling_problem::get_loss()" - << "\n\t invalid inputs were given to this function" - << "\n\t label: " << label - << "\n\t num_labels(): " << num_labels() - << "\n\t this: " << this - ); - - return loss_values[label]; - } - - void set_loss ( - unsigned long label, - double value - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(label < num_labels() && value >= 0, - "\t void structural_svm_sequence_labeling_problem::set_loss()" - << "\n\t invalid inputs were given to this function" - << "\n\t label: " << label - << "\n\t num_labels(): " << num_labels() - << "\n\t value: " << value - << "\n\t this: " << this - ); - - loss_values[label] = value; - } - - private: - virtual long get_num_dimensions ( - ) const - { - return fe.num_features(); - } - - virtual long get_num_samples ( - ) const - { - return samples.size(); - } - - void get_joint_feature_vector ( - const sequence_type& sample, - const std::vector<unsigned long>& label, - feature_vector_type& psi - ) const - { - psi.clear(); - - const int order = fe.order(); - - matrix<unsigned long,0,1> candidate_labeling; - for (unsigned long i = 0; i < sample.size(); ++i) - { - candidate_labeling = rowm(mat(label), range(i, std::max((int)i-order,0))); - - fe_helpers::get_feature_vector(psi,fe,sample,candidate_labeling, i); - } - } - - virtual void get_truth_joint_feature_vector ( - long idx, - feature_vector_type& psi - ) const - { - get_joint_feature_vector(samples[idx], labels[idx], psi); - } - - class map_prob - { - public: - unsigned long order() const { return fe.order(); } - unsigned long num_states() const { return fe.num_labels(); } - - map_prob( - const sequence_type& sequence_, - const std::vector<unsigned long>& label_, - const feature_extractor& fe_, - const matrix<double,0,1>& weights_, - const std::vector<double>& loss_values_ - ) : - sequence(sequence_), - label(label_), - fe(fe_), - weights(weights_), - loss_values(loss_values_) - { - } - - unsigned long number_of_nodes( - ) const - { - return sequence.size(); - } - - template < - typename EXP - > - double factor_value ( - unsigned long node_id, - const matrix_exp<EXP>& node_states - ) const - { - if (dlib::impl::call_reject_labeling_if_exists(fe, sequence, node_states, node_id)) - return -std::numeric_limits<double>::infinity(); - - double loss = 0; - if (node_states(0) != label[node_id]) - loss = loss_values[label[node_id]]; - - return fe_helpers::dot(weights, fe, sequence, node_states, node_id) + loss; - } - - const sequence_type& sequence; - const std::vector<unsigned long>& label; - const feature_extractor& fe; - const matrix<double,0,1>& weights; - const std::vector<double>& loss_values; - }; - - virtual void separation_oracle ( - const long idx, - const matrix_type& current_solution, - scalar_type& loss, - feature_vector_type& psi - ) const - { - std::vector<unsigned long> y; - find_max_factor_graph_viterbi(map_prob(samples[idx],labels[idx],fe,current_solution,loss_values), y); - - loss = 0; - for (unsigned long i = 0; i < y.size(); ++i) - { - if (y[i] != labels[idx][i]) - loss += loss_values[labels[idx][i]]; - } - - get_joint_feature_vector(samples[idx], y, psi); - } - - const std::vector<sequence_type>& samples; - const std::vector<std::vector<unsigned long> >& labels; - const feature_extractor& fe; - std::vector<double> loss_values; - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_STRUCTURAL_SVM_SEQUENCE_LaBELING_PROBLEM_Hh_ - diff --git a/ml/dlib/dlib/svm/structural_svm_sequence_labeling_problem_abstract.h b/ml/dlib/dlib/svm/structural_svm_sequence_labeling_problem_abstract.h deleted file mode 100644 index b46a55350..000000000 --- a/ml/dlib/dlib/svm/structural_svm_sequence_labeling_problem_abstract.h +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_STRUCTURAL_SVM_SEQUENCE_LaBELING_PROBLEM_ABSTRACT_Hh_ -#ifdef DLIB_STRUCTURAL_SVM_SEQUENCE_LaBELING_PROBLEM_ABSTRACT_Hh_ - - -#include "../matrix.h" -#include <vector> -#include "structural_svm_problem_threaded_abstract.h" -#include "sequence_labeler_abstract.h" - -// ---------------------------------------------------------------------------------------- - -namespace dlib -{ - - template < - typename feature_extractor - > - class structural_svm_sequence_labeling_problem : noncopyable, - public structural_svm_problem_threaded<matrix<double,0,1>, - std::vector<std::pair<unsigned long,double> > > - { - /*! - REQUIREMENTS ON feature_extractor - It must be an object that implements an interface compatible with - the example_feature_extractor defined in dlib/svm/sequence_labeler_abstract.h. - - WHAT THIS OBJECT REPRESENTS - This object is a tool for learning the weight vector needed to use - a sequence_labeler object. - - It learns the parameter vector by formulating the problem as a structural - SVM problem. The general approach is discussed in the paper: - Hidden Markov Support Vector Machines by - Y. Altun, I. Tsochantaridis, T. Hofmann - While the particular optimization strategy used is the method from: - T. Joachims, T. Finley, Chun-Nam Yu, Cutting-Plane Training of - Structural SVMs, Machine Learning, 77(1):27-59, 2009. - !*/ - - public: - typedef typename feature_extractor::sequence_type sequence_type; - - structural_svm_sequence_labeling_problem( - const std::vector<sequence_type>& samples, - const std::vector<std::vector<unsigned long> >& labels, - const feature_extractor& fe, - unsigned long num_threads = 2 - ); - /*! - requires - - is_sequence_labeling_problem(samples, labels) == true - - contains_invalid_labeling(fe, samples, labels) == false - - for all valid i and j: labels[i][j] < fe.num_labels() - ensures - - This object attempts to learn a mapping from the given samples to the - given labels. In particular, it attempts to learn to predict labels[i] - based on samples[i]. Or in other words, this object can be used to learn - a parameter vector, w, such that a sequence_labeler declared as: - sequence_labeler<feature_extractor> labeler(w,fe) - results in a labeler object which attempts to compute the following mapping: - labels[i] == labeler(samples[i]) - - This object will use num_threads threads during the optimization - procedure. You should set this parameter equal to the number of - available processing cores on your machine. - - #num_labels() == fe.num_labels() - - for all valid i: #get_loss(i) == 1 - !*/ - - unsigned long num_labels ( - ) const; - /*! - ensures - - returns the number of possible labels in this learning problem - !*/ - - double get_loss ( - unsigned long label - ) const; - /*! - requires - - label < num_labels() - ensures - - returns the loss incurred when a sequence element with the given - label is misclassified. This value controls how much we care about - correctly classifying this type of label. Larger loss values indicate - that we care more strongly than smaller values. - !*/ - - void set_loss ( - unsigned long label, - double value - ); - /*! - requires - - label < num_labels() - - value >= 0 - ensures - - #get_loss(label) == value - !*/ - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_STRUCTURAL_SVM_SEQUENCE_LaBELING_PROBLEM_ABSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/svm/structural_track_association_trainer.h b/ml/dlib/dlib/svm/structural_track_association_trainer.h deleted file mode 100644 index 87fb829b2..000000000 --- a/ml/dlib/dlib/svm/structural_track_association_trainer.h +++ /dev/null @@ -1,404 +0,0 @@ -// Copyright (C) 2014 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_STRUCTURAL_TRACK_ASSOCIATION_TRAnER_Hh_ -#define DLIB_STRUCTURAL_TRACK_ASSOCIATION_TRAnER_Hh_ - -#include "structural_track_association_trainer_abstract.h" -#include "../algs.h" -#include "svm.h" -#include <utility> -#include "track_association_function.h" -#include "structural_assignment_trainer.h" -#include <map> - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - namespace impl - { - template < - typename detection_type, - typename label_type - > - std::vector<detection_type> get_unlabeled_dets ( - const std::vector<labeled_detection<detection_type,label_type> >& dets - ) - { - std::vector<detection_type> temp; - temp.reserve(dets.size()); - for (unsigned long i = 0; i < dets.size(); ++i) - temp.push_back(dets[i].det); - return temp; - } - - } - -// ---------------------------------------------------------------------------------------- - - class structural_track_association_trainer - { - public: - - structural_track_association_trainer ( - ) - { - set_defaults(); - } - - void set_num_threads ( - unsigned long num - ) - { - num_threads = num; - } - - unsigned long get_num_threads ( - ) const - { - return num_threads; - } - - void set_epsilon ( - double eps_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(eps_ > 0, - "\t void structural_track_association_trainer::set_epsilon()" - << "\n\t eps_ must be greater than 0" - << "\n\t eps_: " << eps_ - << "\n\t this: " << this - ); - - eps = eps_; - } - - double get_epsilon ( - ) const { return eps; } - - void set_max_cache_size ( - unsigned long max_size - ) - { - max_cache_size = max_size; - } - - unsigned long get_max_cache_size ( - ) const - { - return max_cache_size; - } - - void set_loss_per_false_association ( - double loss - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(loss > 0, - "\t void structural_track_association_trainer::set_loss_per_false_association(loss)" - << "\n\t Invalid inputs were given to this function " - << "\n\t loss: " << loss - << "\n\t this: " << this - ); - - loss_per_false_association = loss; - } - - double get_loss_per_false_association ( - ) const - { - return loss_per_false_association; - } - - void set_loss_per_track_break ( - double loss - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(loss > 0, - "\t void structural_track_association_trainer::set_loss_per_track_break(loss)" - << "\n\t Invalid inputs were given to this function " - << "\n\t loss: " << loss - << "\n\t this: " << this - ); - - loss_per_track_break = loss; - } - - double get_loss_per_track_break ( - ) const - { - return loss_per_track_break; - } - - void be_verbose ( - ) - { - verbose = true; - } - - void be_quiet ( - ) - { - verbose = false; - } - - void set_oca ( - const oca& item - ) - { - solver = item; - } - - const oca get_oca ( - ) const - { - return solver; - } - - void set_c ( - double C_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(C_ > 0, - "\t void structural_track_association_trainer::set_c()" - << "\n\t C_ must be greater than 0" - << "\n\t C_: " << C_ - << "\n\t this: " << this - ); - - C = C_; - } - - double get_c ( - ) const - { - return C; - } - - bool learns_nonnegative_weights ( - ) const { return learn_nonnegative_weights; } - - void set_learns_nonnegative_weights ( - bool value - ) - { - learn_nonnegative_weights = value; - } - - template < - typename detection_type, - typename label_type - > - const track_association_function<detection_type> train ( - const std::vector<std::vector<std::vector<labeled_detection<detection_type,label_type> > > >& samples - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(is_track_association_problem(samples), - "\t track_association_function structural_track_association_trainer::train()" - << "\n\t invalid inputs were given to this function" - << "\n\t is_track_association_problem(samples): " << is_track_association_problem(samples) - ); - - typedef typename detection_type::track_type track_type; - - const unsigned long num_dims = find_num_dims(samples); - - feature_extractor_track_association<detection_type> fe(num_dims, learn_nonnegative_weights?num_dims:0); - structural_assignment_trainer<feature_extractor_track_association<detection_type> > trainer(fe); - - - if (verbose) - trainer.be_verbose(); - - trainer.set_c(C); - trainer.set_epsilon(eps); - trainer.set_max_cache_size(max_cache_size); - trainer.set_num_threads(num_threads); - trainer.set_oca(solver); - trainer.set_loss_per_missed_association(loss_per_track_break); - trainer.set_loss_per_false_association(loss_per_false_association); - - std::vector<std::pair<std::vector<detection_type>, std::vector<track_type> > > assignment_samples; - std::vector<std::vector<long> > labels; - for (unsigned long i = 0; i < samples.size(); ++i) - convert_dets_to_association_sets(samples[i], assignment_samples, labels); - - - return track_association_function<detection_type>(trainer.train(assignment_samples, labels)); - } - - template < - typename detection_type, - typename label_type - > - const track_association_function<detection_type> train ( - const std::vector<std::vector<labeled_detection<detection_type,label_type> > >& sample - ) const - { - std::vector<std::vector<std::vector<labeled_detection<detection_type,label_type> > > > samples; - samples.push_back(sample); - return train(samples); - } - - private: - - template < - typename detection_type, - typename label_type - > - static unsigned long find_num_dims ( - const std::vector<std::vector<std::vector<labeled_detection<detection_type,label_type> > > >& samples - ) - { - typedef typename detection_type::track_type track_type; - // find a detection_type object so we can call get_similarity_features() and - // find out how big the feature vectors are. - - // for all detection histories - for (unsigned long i = 0; i < samples.size(); ++i) - { - // for all time instances in the detection history - for (unsigned j = 0; j < samples[i].size(); ++j) - { - if (samples[i][j].size() > 0) - { - track_type new_track; - new_track.update_track(samples[i][j][0].det); - typename track_type::feature_vector_type feats; - new_track.get_similarity_features(samples[i][j][0].det, feats); - return feats.size(); - } - } - } - - DLIB_CASSERT(false, - "No detection objects were given in the call to dlib::structural_track_association_trainer::train()"); - } - - template < - typename detections_at_single_time_step, - typename detection_type, - typename track_type - > - static void convert_dets_to_association_sets ( - const std::vector<detections_at_single_time_step>& det_history, - std::vector<std::pair<std::vector<detection_type>, std::vector<track_type> > >& data, - std::vector<std::vector<long> >& labels - ) - { - if (det_history.size() < 1) - return; - - typedef typename detections_at_single_time_step::value_type::label_type label_type; - std::vector<track_type> tracks; - // track_labels maps from detection labels to the index in tracks. So track - // with detection label X is at tracks[track_labels[X]]. - std::map<label_type,unsigned long> track_labels; - add_dets_to_tracks(tracks, track_labels, det_history[0]); - - using namespace impl; - for (unsigned long i = 1; i < det_history.size(); ++i) - { - data.push_back(std::make_pair(get_unlabeled_dets(det_history[i]), tracks)); - labels.push_back(get_association_labels(det_history[i], track_labels)); - add_dets_to_tracks(tracks, track_labels, det_history[i]); - } - } - - template < - typename labeled_detection, - typename label_type - > - static std::vector<long> get_association_labels( - const std::vector<labeled_detection>& dets, - const std::map<label_type,unsigned long>& track_labels - ) - { - std::vector<long> assoc(dets.size(),-1); - // find out which detections associate to what tracks - for (unsigned long i = 0; i < dets.size(); ++i) - { - typename std::map<label_type,unsigned long>::const_iterator j; - j = track_labels.find(dets[i].label); - // If this detection matches one of the tracks then record which track it - // matched with. - if (j != track_labels.end()) - assoc[i] = j->second; - } - return assoc; - } - - template < - typename track_type, - typename label_type, - typename labeled_detection - > - static void add_dets_to_tracks ( - std::vector<track_type>& tracks, - std::map<label_type,unsigned long>& track_labels, - const std::vector<labeled_detection>& dets - ) - { - std::vector<bool> updated_track(tracks.size(), false); - - // first assign the dets to the tracks - for (unsigned long i = 0; i < dets.size(); ++i) - { - const label_type& label = dets[i].label; - if (track_labels.count(label)) - { - const unsigned long track_idx = track_labels[label]; - tracks[track_idx].update_track(dets[i].det); - updated_track[track_idx] = true; - } - else - { - // this detection creates a new track - track_type new_track; - new_track.update_track(dets[i].det); - tracks.push_back(new_track); - track_labels[label] = tracks.size()-1; - } - - } - - // Now propagate all the tracks that didn't get any detections. - for (unsigned long i = 0; i < updated_track.size(); ++i) - { - if (!updated_track[i]) - tracks[i].propagate_track(); - } - } - - double C; - oca solver; - double eps; - bool verbose; - unsigned long num_threads; - unsigned long max_cache_size; - bool learn_nonnegative_weights; - double loss_per_track_break; - double loss_per_false_association; - - void set_defaults () - { - C = 100; - verbose = false; - eps = 0.001; - num_threads = 2; - max_cache_size = 5; - learn_nonnegative_weights = false; - loss_per_track_break = 1; - loss_per_false_association = 1; - } - }; - -} - -#endif // DLIB_STRUCTURAL_TRACK_ASSOCIATION_TRAnER_Hh_ - diff --git a/ml/dlib/dlib/svm/structural_track_association_trainer_abstract.h b/ml/dlib/dlib/svm/structural_track_association_trainer_abstract.h deleted file mode 100644 index e78fadef7..000000000 --- a/ml/dlib/dlib/svm/structural_track_association_trainer_abstract.h +++ /dev/null @@ -1,268 +0,0 @@ -// Copyright (C) 2014 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_STRUCTURAL_TRACK_ASSOCIATION_TRAnER_ABSTRACT_Hh_ -#ifdef DLIB_STRUCTURAL_TRACK_ASSOCIATION_TRAnER_ABSTRACT_Hh_ - -#include "track_association_function_abstract.h" -#include "structural_assignment_trainer_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - class structural_track_association_trainer - { - /*! - WHAT THIS OBJECT REPRESENTS - This object is a tool for learning to solve a track association problem. That - is, it takes in a set of training data and outputs a track_association_function - you can use to do detection to track association. The training data takes the - form of a set or sets of "track histories". Each track history is a - std::vector where each element contains all the detections from a single time - step. Moreover, each detection has a label that uniquely identifies which - object (e.g. person or whatever) the detection really corresponds to. That is, - the labels indicate the correct detection to track associations. The goal of - this object is then to produce a track_association_function that can perform a - correct detection to track association at each time step. - !*/ - - public: - - structural_track_association_trainer ( - ); - /*! - ensures - - #get_c() == 100 - - this object isn't verbose - - #get_epsilon() == 0.001 - - #get_num_threads() == 2 - - #get_max_cache_size() == 5 - - #learns_nonnegative_weights() == false - - #get_loss_per_track_break() == 1 - - #get_loss_per_false_association() == 1 - !*/ - - void set_num_threads ( - unsigned long num - ); - /*! - ensures - - #get_num_threads() == num - !*/ - - unsigned long get_num_threads ( - ) const; - /*! - ensures - - returns the number of threads used during training. You should - usually set this equal to the number of processing cores on your - machine. - !*/ - - void set_epsilon ( - double eps - ); - /*! - requires - - eps > 0 - ensures - - #get_epsilon() == eps - !*/ - - double get_epsilon ( - ) const; - /*! - ensures - - returns the error epsilon that determines when training should stop. - Smaller values may result in a more accurate solution but take longer to - train. You can think of this epsilon value as saying "solve the - optimization problem until the average number of association mistakes per - time step is within epsilon of its optimal value". - !*/ - - void set_max_cache_size ( - unsigned long max_size - ); - /*! - ensures - - #get_max_cache_size() == max_size - !*/ - - unsigned long get_max_cache_size ( - ) const; - /*! - ensures - - During training, this object basically runs the track_association_function on - each training sample, over and over. To speed this up, it is possible to - cache the results of these invocations. This function returns the number - of cache elements per training sample kept in the cache. Note that a value - of 0 means caching is not used at all. - !*/ - - void be_verbose ( - ); - /*! - ensures - - This object will print status messages to standard out so that a user can - observe the progress of the algorithm. - !*/ - - void be_quiet ( - ); - /*! - ensures - - this object will not print anything to standard out - !*/ - - void set_loss_per_false_association ( - double loss - ); - /*! - requires - - loss > 0 - ensures - - #get_loss_per_false_association() == loss - !*/ - - double get_loss_per_false_association ( - ) const; - /*! - ensures - - returns the amount of loss experienced for assigning a detection to the - wrong track. If you care more about avoiding false associations than - avoiding track breaks then you can increase this value. - !*/ - - void set_loss_per_track_break ( - double loss - ); - /*! - requires - - loss > 0 - ensures - - #get_loss_per_track_break() == loss - !*/ - - double get_loss_per_track_break ( - ) const; - /*! - ensures - - returns the amount of loss experienced for incorrectly assigning a - detection to a new track instead of assigning it to its existing track. - If you care more about avoiding track breaks than avoiding things like - track swaps then you can increase this value. - !*/ - - void set_oca ( - const oca& item - ); - /*! - ensures - - #get_oca() == item - !*/ - - const oca get_oca ( - ) const; - /*! - ensures - - Internally this object treats track association learning as a structural - SVM problem. This routine returns a copy of the optimizer used to solve - the structural SVM problem. - !*/ - - void set_c ( - double C - ); - /*! - ensures - - returns the SVM regularization parameter. It is the parameter that - determines the trade-off between trying to fit the training data (i.e. - minimize the loss) or allowing more errors but hopefully improving the - generalization of the resulting track_association_function. Larger - values encourage exact fitting while smaller values of C may encourage - better generalization. - !*/ - - double get_c ( - ) const; - /*! - requires - - C > 0 - ensures - - #get_c() = C - !*/ - - bool learns_nonnegative_weights ( - ) const; - /*! - ensures - - Ultimately, the output of training is a parameter vector that defines the - behavior of the track_association_function. If - learns_nonnegative_weights() == true then the resulting learned parameter - vector will always have non-negative entries. - !*/ - - void set_learns_nonnegative_weights ( - bool value - ); - /*! - ensures - - #learns_nonnegative_weights() == value - !*/ - - template < - typename detection_type, - typename label_type - > - const track_association_function<detection_type> train ( - const std::vector<std::vector<labeled_detection<detection_type,label_type> > >& sample - ) const; - /*! - requires - - is_track_association_problem(sample) == true - ensures - - This function attempts to learn to do track association from the given - training data. Note that we interpret sample as a single track history such - that sample[0] are all detections from the first time step, then sample[1] - are detections from the second time step, and so on. - - returns a function F such that: - - Executing F(tracks, detections) will try to correctly associate the - contents of detections to the contents of tracks and perform track - updating and creation. - - if (learns_nonnegative_weights() == true) then - - min(F.get_assignment_function().get_weights()) >= 0 - !*/ - - template < - typename detection_type, - typename label_type - > - const track_association_function<detection_type> train ( - const std::vector<std::vector<std::vector<labeled_detection<detection_type,label_type> > > >& sample - ) const; - /*! - requires - - is_track_association_problem(samples) == true - ensures - - This function attempts to learn to do track association from the given - training data. In this case, we take a set of track histories as - training data instead of just one track history as with the above train() - method. - - returns a function F such that: - - Executing F(tracks, detections) will try to correctly associate the - contents of detections to the contents of tracks and perform track - updating and creation. - - if (learns_nonnegative_weights() == true) then - - min(F.get_assignment_function().get_weights()) >= 0 - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_STRUCTURAL_TRACK_ASSOCIATION_TRAnER_ABSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/svm/svm.h b/ml/dlib/dlib/svm/svm.h deleted file mode 100644 index e0587ef4a..000000000 --- a/ml/dlib/dlib/svm/svm.h +++ /dev/null @@ -1,1205 +0,0 @@ -// Copyright (C) 2007 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_SVm_ -#define DLIB_SVm_ - -#include "svm_abstract.h" -#include <cmath> -#include <limits> -#include <sstream> -#include "../matrix.h" -#include "../algs.h" -#include "../serialize.h" -#include "../rand.h" -#include "../std_allocator.h" -#include "function.h" -#include "kernel.h" -#include "../enable_if.h" -#include "../optimization.h" -#include "svm_nu_trainer.h" -#include <vector> -#include <set> - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename U - > - inline bool is_learning_problem_impl ( - const T& x, - const U& x_labels - ) - { - return is_col_vector(x) && - is_col_vector(x_labels) && - x.size() == x_labels.size() && - x.size() > 0; - } - - template < - typename T, - typename U - > - inline bool is_learning_problem ( - const T& x, - const U& x_labels - ) - { - return is_learning_problem_impl(mat(x), mat(x_labels)); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename U - > - bool is_binary_classification_problem_impl ( - const T& x, - const U& x_labels - ) - { - bool seen_neg_class = false; - bool seen_pos_class = false; - - if (is_learning_problem_impl(x,x_labels) == false) - return false; - - if (x.size() <= 1) return false; - - for (long r = 0; r < x_labels.nr(); ++r) - { - if (x_labels(r) != -1 && x_labels(r) != 1) - return false; - - if (x_labels(r) == 1) - seen_pos_class = true; - if (x_labels(r) == -1) - seen_neg_class = true; - } - - return seen_pos_class && seen_neg_class; - } - - template < - typename T, - typename U - > - bool is_binary_classification_problem ( - const T& x, - const U& x_labels - ) - { - return is_binary_classification_problem_impl(mat(x), mat(x_labels)); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename dec_funct_type, - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const matrix<double,1,2> test_binary_decision_function_impl ( - const dec_funct_type& dec_funct, - const in_sample_vector_type& x_test, - const in_scalar_vector_type& y_test - ) - { - - // make sure requires clause is not broken - DLIB_ASSERT( is_binary_classification_problem(x_test,y_test) == true, - "\tmatrix test_binary_decision_function()" - << "\n\t invalid inputs were given to this function" - << "\n\t is_binary_classification_problem(x_test,y_test): " - << ((is_binary_classification_problem(x_test,y_test))? "true":"false")); - - - // count the number of positive and negative examples - long num_pos = 0; - long num_neg = 0; - - - long num_pos_correct = 0; - long num_neg_correct = 0; - - - // now test this trained object - for (long i = 0; i < x_test.nr(); ++i) - { - // if this is a positive example - if (y_test(i) == +1.0) - { - ++num_pos; - if (dec_funct(x_test(i)) >= 0) - ++num_pos_correct; - } - else if (y_test(i) == -1.0) - { - ++num_neg; - if (dec_funct(x_test(i)) < 0) - ++num_neg_correct; - } - else - { - throw dlib::error("invalid input labels to the test_binary_decision_function() function"); - } - } - - - matrix<double, 1, 2> res; - res(0) = (double)num_pos_correct/(double)(num_pos); - res(1) = (double)num_neg_correct/(double)(num_neg); - return res; - } - - template < - typename dec_funct_type, - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const matrix<double,1,2> test_binary_decision_function ( - const dec_funct_type& dec_funct, - const in_sample_vector_type& x_test, - const in_scalar_vector_type& y_test - ) - { - return test_binary_decision_function_impl(dec_funct, - mat(x_test), - mat(y_test)); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename sequence_type - > - bool is_sequence_labeling_problem ( - const std::vector<sequence_type>& samples, - const std::vector<std::vector<unsigned long> >& labels - ) - { - if (is_learning_problem(samples, labels)) - { - for (unsigned long i = 0; i < samples.size(); ++i) - { - if (samples[i].size() != labels[i].size()) - return false; - } - return true; - } - - return false; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename sequence_type - > - bool is_sequence_segmentation_problem ( - const std::vector<sequence_type>& samples, - const std::vector<std::vector<std::pair<unsigned long,unsigned long> > >& segments - ) - { - if (is_learning_problem(samples, segments)) - { - for (unsigned long i = 0; i < samples.size(); ++i) - { - // Make sure the segments are inside samples[i] and don't overlap with each - // other. - std::vector<bool> hits(samples[i].size(), false); - for (unsigned long j = 0; j < segments[i].size(); ++j) - { - const unsigned long begin = segments[i][j].first; - const unsigned long end = segments[i][j].second; - // if the segment is outside the sequence - if (end > samples[i].size()) - return false; - - if (begin >= end) - return false; - - // check for overlap - for (unsigned long k = begin; k < end; ++k) - { - if (hits[k]) - return false; - hits[k] = true; - } - } - } - return true; - } - - return false; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename lhs_type, - typename rhs_type - > - bool is_assignment_problem ( - const std::vector<std::pair<std::vector<lhs_type>, std::vector<rhs_type> > >& samples, - const std::vector<std::vector<long> >& labels - ) - { - std::vector<bool> seen_label; - - if (is_learning_problem(samples, labels)) - { - for (unsigned long i = 0; i < samples.size(); ++i) - { - if (samples[i].first.size() != labels[i].size()) - return false; - - seen_label.assign(samples[i].second.size(), false); - - for (unsigned long j = 0; j < labels[i].size(); ++j) - { - if (!(-1 <= labels[i][j] && labels[i][j] < (long)samples[i].second.size())) - return false; - - if (labels[i][j] != -1) - { - // check label uniqueness - if (seen_label[labels[i][j]]) - return false; - - seen_label[labels[i][j]] = true; - } - } - } - return true; - } - - return false; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename lhs_type, - typename rhs_type - > - bool is_forced_assignment_problem ( - const std::vector<std::pair<std::vector<lhs_type>, std::vector<rhs_type> > >& samples, - const std::vector<std::vector<long> >& labels - ) - { - if (is_assignment_problem(samples, labels)) - { - for (unsigned long i = 0; i < samples.size(); ++i) - { - const unsigned long N = sum(mat(labels[i]) != -1); - if (std::min(samples[i].first.size(), samples[i].second.size()) != N) - return false; - } - return true; - } - - return false; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename detection_type_, - typename label_type_ = long - > - struct labeled_detection - { - typedef detection_type_ detection_type; - typedef label_type_ label_type; - detection_type det; - label_type label; - }; - - template < - typename detection_type_, - typename label_type_ - > - inline void serialize ( const labeled_detection<detection_type_,label_type_>& item, std::ostream& out) - { - serialize(item.det, out); - serialize(item.label, out); - } - - template < - typename detection_type_, - typename label_type_ - > - inline void deserialize (labeled_detection<detection_type_,label_type_>& item, std::istream& in) - { - deserialize(item.det, in); - deserialize(item.label, in); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename detection_type, - typename label_type - > - bool is_track_association_problem ( - const std::vector<std::vector<labeled_detection<detection_type,label_type> > >& samples - ) - { - if (samples.size() == 0) - return false; - - unsigned long num_nonzero_elements = 0; - for (unsigned long i = 0; i < samples.size(); ++i) - { - if (samples.size() > 0) - ++num_nonzero_elements; - } - if (num_nonzero_elements < 2) - return false; - - // now make sure the label_type values are unique within each time step. - for (unsigned long i = 0; i < samples.size(); ++i) - { - std::set<label_type> vals; - for (unsigned long j = 0; j < samples[i].size(); ++j) - vals.insert(samples[i][j].label); - if (vals.size() != samples[i].size()) - return false; - } - - // passed all tests so it's good - return true; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename detection_type, - typename label_type - > - bool is_track_association_problem ( - const std::vector<std::vector<std::vector<labeled_detection<detection_type,label_type> > > >& samples - ) - { - for (unsigned long i = 0; i < samples.size(); ++i) - { - if (!is_track_association_problem(samples[i])) - return false; - } - - // passed all tests so it's good - return true; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type, - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const matrix<double, 1, 2, typename trainer_type::mem_manager_type> - cross_validate_trainer_impl ( - const trainer_type& trainer, - const in_sample_vector_type& x, - const in_scalar_vector_type& y, - const long folds - ) - { - typedef typename in_scalar_vector_type::value_type scalar_type; - typedef typename trainer_type::mem_manager_type mem_manager_type; - typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type; - - // make sure requires clause is not broken - DLIB_ASSERT(is_binary_classification_problem(x,y) == true && - 1 < folds && folds <= std::min(sum(y>0),sum(y<0)), - "\tmatrix cross_validate_trainer()" - << "\n\t invalid inputs were given to this function" - << "\n\t std::min(sum(y>0),sum(y<0)): " << std::min(sum(y>0),sum(y<0)) - << "\n\t folds: " << folds - << "\n\t is_binary_classification_problem(x,y): " << ((is_binary_classification_problem(x,y))? "true":"false") - ); - - - // count the number of positive and negative examples - long num_pos = 0; - long num_neg = 0; - for (long r = 0; r < y.nr(); ++r) - { - if (y(r) == +1.0) - ++num_pos; - else - ++num_neg; - } - - // figure out how many positive and negative examples we will have in each fold - const long num_pos_test_samples = num_pos/folds; - const long num_pos_train_samples = num_pos - num_pos_test_samples; - const long num_neg_test_samples = num_neg/folds; - const long num_neg_train_samples = num_neg - num_neg_test_samples; - - - matrix<long,0,1> x_test, x_train; - scalar_vector_type y_test, y_train; - x_test.set_size (num_pos_test_samples + num_neg_test_samples); - y_test.set_size (num_pos_test_samples + num_neg_test_samples); - x_train.set_size(num_pos_train_samples + num_neg_train_samples); - y_train.set_size(num_pos_train_samples + num_neg_train_samples); - - long pos_idx = 0; - long neg_idx = 0; - - matrix<double, 1, 2, mem_manager_type> res; - set_all_elements(res,0); - - for (long i = 0; i < folds; ++i) - { - long cur = 0; - - // load up our positive test samples - while (cur < num_pos_test_samples) - { - if (y(pos_idx) == +1.0) - { - x_test(cur) = pos_idx; - y_test(cur) = +1.0; - ++cur; - } - pos_idx = (pos_idx+1)%x.nr(); - } - - // load up our negative test samples - while (cur < x_test.nr()) - { - if (y(neg_idx) == -1.0) - { - x_test(cur) = neg_idx; - y_test(cur) = -1.0; - ++cur; - } - neg_idx = (neg_idx+1)%x.nr(); - } - - // load the training data from the data following whatever we loaded - // as the testing data - long train_pos_idx = pos_idx; - long train_neg_idx = neg_idx; - cur = 0; - - // load up our positive train samples - while (cur < num_pos_train_samples) - { - if (y(train_pos_idx) == +1.0) - { - x_train(cur) = train_pos_idx; - y_train(cur) = +1.0; - ++cur; - } - train_pos_idx = (train_pos_idx+1)%x.nr(); - } - - // load up our negative train samples - while (cur < x_train.nr()) - { - if (y(train_neg_idx) == -1.0) - { - x_train(cur) = train_neg_idx; - y_train(cur) = -1.0; - ++cur; - } - train_neg_idx = (train_neg_idx+1)%x.nr(); - } - - try - { - // do the training and testing - res += test_binary_decision_function(trainer.train(rowm(x,x_train),y_train),rowm(x,x_test),y_test); - } - catch (invalid_nu_error&) - { - // Just ignore the error in this case since we are going to - // interpret an invalid nu value the same as generating a decision - // function that miss-classifies everything. - } - - } // for (long i = 0; i < folds; ++i) - - return res/(double)folds; - } - - template < - typename trainer_type, - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const matrix<double, 1, 2, typename trainer_type::mem_manager_type> - cross_validate_trainer ( - const trainer_type& trainer, - const in_sample_vector_type& x, - const in_scalar_vector_type& y, - const long folds - ) - { - return cross_validate_trainer_impl(trainer, - mat(x), - mat(y), - folds); - } - -// ---------------------------------------------------------------------------------------- - - namespace prob_impl - { - template <typename vect_type> - struct objective - { - objective ( - const vect_type& f_, - const vect_type& t_ - ) : f(f_), t(t_) {} - - double operator() ( - const matrix<double,2,1>& x - ) const - { - const double A = x(0); - const double B = x(1); - - double res = 0; - for (unsigned long i = 0; i < f.size(); ++i) - { - const double val = A*f[i]+B; - // See the paper "A Note on Platt's Probabilistic Outputs for Support Vector Machines" - // for an explanation of why this code looks the way it does (rather than being the - // obvious formula). - if (val < 0) - res += (t[i] - 1)*val + std::log(1 + std::exp(val)); - else - res += t[i]*val + std::log(1 + std::exp(-val)); - } - - return res; - } - - const vect_type& f; - const vect_type& t; - }; - - template <typename vect_type> - struct der - { - der ( - const vect_type& f_, - const vect_type& t_ - ) : f(f_), t(t_) {} - - matrix<double,2,1> operator() ( - const matrix<double,2,1>& x - ) const - { - const double A = x(0); - const double B = x(1); - - double derA = 0; - double derB = 0; - - for (unsigned long i = 0; i < f.size(); ++i) - { - const double val = A*f[i]+B; - double p; - // compute p = 1/(1+exp(val)) - // but do so in a way that avoids numerical overflow. - if (val < 0) - p = 1.0/(1 + std::exp(val)); - else - p = std::exp(-val)/(1 + std::exp(-val)); - - derA += f[i]*(t[i] - p); - derB += (t[i] - p); - } - - matrix<double,2,1> res; - res = derA, derB; - return res; - } - - const vect_type& f; - const vect_type& t; - }; - - template <typename vect_type> - struct hessian - { - hessian ( - const vect_type& f_, - const vect_type& t_ - ) : f(f_), t(t_) {} - - matrix<double,2,2> operator() ( - const matrix<double,2,1>& x - ) const - { - const double A = x(0); - const double B = x(1); - - matrix<double,2,2> h; - h = 0; - - for (unsigned long i = 0; i < f.size(); ++i) - { - const double val = A*f[i]+B; - // compute pp = 1/(1+exp(val)) and - // compute pn = 1 - pp - // but do so in a way that avoids numerical overflow and catastrophic cancellation. - double pp, pn; - if (val < 0) - { - const double temp = std::exp(val); - pp = 1.0/(1 + temp); - pn = temp*pp; - } - else - { - const double temp = std::exp(-val); - pn = 1.0/(1 + temp); - pp = temp*pn; - } - - h(0,0) += f[i]*f[i]*pp*pn; - const double temp2 = f[i]*pp*pn; - h(0,1) += temp2; - h(1,0) += temp2; - h(1,1) += pp*pn; - } - - return h; - } - - const vect_type& f; - const vect_type& t; - }; - } - -// ---------------------------------------------------------------------------------------- - - inline double platt_scale ( - const std::pair<double,double>& params, - const double score - ) - { - return 1/(1 + std::exp(params.first*score + params.second)); - } - -// ---------------------------------------------------------------------------------------- - - template <typename T, typename alloc> - std::pair<double,double> learn_platt_scaling ( - const std::vector<T,alloc>& scores, - const std::vector<T,alloc>& labels - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_binary_classification_problem(scores,labels) == true, - "\t std::pair<T,T> learn_platt_scaling()" - << "\n\t invalid inputs were given to this function" - << "\n\t scores.size(): " << scores.size() - << "\n\t labels.size(): " << labels.size() - << "\n\t is_binary_classification_problem(scores,labels): " << is_binary_classification_problem(scores,labels) - ); - - const T num_pos = sum(mat(labels)>0); - const T num_neg = sum(mat(labels)<0); - const T hi_target = (num_pos+1)/(num_pos+2); - const T lo_target = 1.0/(num_neg+2); - - std::vector<T,alloc> target; - for (unsigned long i = 0; i < labels.size(); ++i) - { - // if this was a positive example - if (labels[i] == +1.0) - { - target.push_back(hi_target); - } - else if (labels[i] == -1.0) - { - target.push_back(lo_target); - } - else - { - throw dlib::error("invalid input labels to the learn_platt_scaling() function."); - } - } - - // Now find the maximum likelihood parameters of the sigmoid. - - prob_impl::objective<std::vector<T,alloc> > obj(scores, target); - prob_impl::der<std::vector<T,alloc> > obj_der(scores, target); - prob_impl::hessian<std::vector<T,alloc> > obj_hessian(scores, target); - - matrix<double,2,1> val; - val = 0; - find_min(newton_search_strategy(obj_hessian), - objective_delta_stop_strategy(), - obj, - obj_der, - val, - 0); - - const double A = val(0); - const double B = val(1); - - return std::make_pair(A,B); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type, - typename sample_vector_type, - typename label_vector_type - > - const probabilistic_function<typename trainer_type::trained_function_type> - train_probabilistic_decision_function ( - const trainer_type& trainer, - const sample_vector_type& x, - const label_vector_type& y, - const long folds - ) - { - typedef typename sample_vector_type::value_type sample_type; - typedef typename label_vector_type::value_type scalar_type; - - /* - This function fits a sigmoid function to the output of the - svm trained by svm_nu_trainer or a similar trainer. The - technique used is the one described in the papers: - - Probabilistic Outputs for Support Vector Machines and - Comparisons to Regularized Likelihood Methods by - John C. Platt. March 26, 1999 - - A Note on Platt's Probabilistic Outputs for Support Vector Machines - by Hsuan-Tien Lin, Chih-Jen Lin, and Ruby C. Weng - */ - - // make sure requires clause is not broken - DLIB_ASSERT(is_binary_classification_problem(x,y) == true && - 1 < folds && folds <= (long)x.size(), - "\tprobabilistic_decision_function train_probabilistic_decision_function()" - << "\n\t invalid inputs were given to this function" - << "\n\t x.size(): " << x.size() - << "\n\t y.size(): " << y.size() - << "\n\t folds: " << folds - << "\n\t is_binary_classification_problem(x,y): " << is_binary_classification_problem(x,y) - ); - - // count the number of positive and negative examples - const long num_pos = (long)sum(mat(y) > 0); - const long num_neg = (long)sum(mat(y) < 0); - - // figure out how many positive and negative examples we will have in each fold - const long num_pos_test_samples = num_pos/folds; - const long num_pos_train_samples = num_pos - num_pos_test_samples; - const long num_neg_test_samples = num_neg/folds; - const long num_neg_train_samples = num_neg - num_neg_test_samples; - - typename trainer_type::trained_function_type d; - std::vector<sample_type> x_test, x_train; - std::vector<scalar_type> y_test, y_train; - x_test.resize (num_pos_test_samples + num_neg_test_samples); - y_test.resize (num_pos_test_samples + num_neg_test_samples); - x_train.resize(num_pos_train_samples + num_neg_train_samples); - y_train.resize(num_pos_train_samples + num_neg_train_samples); - - std::vector<scalar_type> out, out_label; - - long pos_idx = 0; - long neg_idx = 0; - - for (long i = 0; i < folds; ++i) - { - long cur = 0; - - // load up our positive test samples - while (cur < num_pos_test_samples) - { - if (y[pos_idx] == +1.0) - { - x_test[cur] = x[pos_idx]; - y_test[cur] = +1.0; - ++cur; - } - pos_idx = (pos_idx+1)%x.size(); - } - - // load up our negative test samples - while (cur < (long)x_test.size()) - { - if (y[neg_idx] == -1.0) - { - x_test[cur] = x[neg_idx]; - y_test[cur] = -1.0; - ++cur; - } - neg_idx = (neg_idx+1)%x.size(); - } - - // load the training data from the data following whatever we loaded - // as the testing data - long train_pos_idx = pos_idx; - long train_neg_idx = neg_idx; - cur = 0; - - // load up our positive train samples - while (cur < num_pos_train_samples) - { - if (y[train_pos_idx] == +1.0) - { - x_train[cur] = x[train_pos_idx]; - y_train[cur] = +1.0; - ++cur; - } - train_pos_idx = (train_pos_idx+1)%x.size(); - } - - // load up our negative train samples - while (cur < (long)x_train.size()) - { - if (y[train_neg_idx] == -1.0) - { - x_train[cur] = x[train_neg_idx]; - y_train[cur] = -1.0; - ++cur; - } - train_neg_idx = (train_neg_idx+1)%x.size(); - } - - // do the training - d = trainer.train (x_train,y_train); - - // now test this fold - for (unsigned long i = 0; i < x_test.size(); ++i) - { - out.push_back(d(x_test[i])); - out_label.push_back(y_test[i]); - } - - } // for (long i = 0; i < folds; ++i) - - std::pair<double,double> params = learn_platt_scaling(out, out_label); - - const double A = params.first; - const double B = params.second; - - return probabilistic_function<typename trainer_type::trained_function_type>( A, B, trainer.train(x,y) ); - } - -// ---------------------------------------------------------------------------------------- - - template <typename trainer_type> - struct trainer_adapter_probabilistic - { - typedef probabilistic_function<typename trainer_type::trained_function_type> trained_function_type; - - const trainer_type trainer; - const long folds; - - trainer_adapter_probabilistic ( - const trainer_type& trainer_, - const long folds_ - ) : trainer(trainer_),folds(folds_) {} - - template < - typename T, - typename U - > - const trained_function_type train ( - const T& samples, - const U& labels - ) const - { - return train_probabilistic_decision_function(trainer, samples, labels, folds); - } - - }; - - template < - typename trainer_type - > - trainer_adapter_probabilistic<trainer_type> probabilistic ( - const trainer_type& trainer, - const long folds - ) - { - return trainer_adapter_probabilistic<trainer_type>(trainer,folds); - } - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename U, - typename V, - typename rand_type - > - typename enable_if<is_matrix<T>,void>::type randomize_samples ( - T& t, - U& u, - V& v, - rand_type& r - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_vector(t) && is_vector(u) && is_vector(v) && u.size() == t.size() && - u.size() == v.size(), - "\t randomize_samples(t,u,v)" - << "\n\t invalid inputs were given to this function" - << "\n\t t.size(): " << t.size() - << "\n\t u.size(): " << u.size() - << "\n\t v.size(): " << v.size() - << "\n\t is_vector(t): " << is_vector(t) - << "\n\t is_vector(u): " << is_vector(u) - << "\n\t is_vector(v): " << is_vector(v) - ); - - long n = t.size()-1; - while (n > 0) - { - // pick a random index to swap into t[n] - const unsigned long idx = r.get_random_32bit_number()%(n+1); - - // swap our randomly selected index into the n position - exchange(t(idx), t(n)); - exchange(u(idx), u(n)); - exchange(v(idx), v(n)); - - --n; - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename U, - typename V, - typename rand_type - > - typename disable_if<is_matrix<T>,void>::type randomize_samples ( - T& t, - U& u, - V& v, - rand_type& r - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(u.size() == t.size() && u.size() == v.size(), - "\t randomize_samples(t,u,v)" - << "\n\t invalid inputs were given to this function" - << "\n\t t.size(): " << t.size() - << "\n\t u.size(): " << u.size() - << "\n\t v.size(): " << v.size() - ); - - long n = t.size()-1; - while (n > 0) - { - // pick a random index to swap into t[n] - const unsigned long idx = r.get_random_32bit_number()%(n+1); - - // swap our randomly selected index into the n position - exchange(t[idx], t[n]); - exchange(u[idx], u[n]); - exchange(v[idx], v[n]); - - --n; - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename U, - typename V - > - typename disable_if<is_rand<V>,void>::type randomize_samples ( - T& t, - U& u, - V& v - ) - { - rand r; - randomize_samples(t,u,v,r); - } - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename U, - typename rand_type - > - typename enable_if_c<is_matrix<T>::value && is_rand<rand_type>::value,void>::type randomize_samples ( - T& t, - U& u, - rand_type& r - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_vector(t) && is_vector(u) && u.size() == t.size(), - "\t randomize_samples(t,u)" - << "\n\t invalid inputs were given to this function" - << "\n\t t.size(): " << t.size() - << "\n\t u.size(): " << u.size() - << "\n\t is_vector(t): " << (is_vector(t)? "true" : "false") - << "\n\t is_vector(u): " << (is_vector(u)? "true" : "false") - ); - - long n = t.size()-1; - while (n > 0) - { - // pick a random index to swap into t[n] - const unsigned long idx = r.get_random_32bit_number()%(n+1); - - // swap our randomly selected index into the n position - exchange(t(idx), t(n)); - exchange(u(idx), u(n)); - - --n; - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename U, - typename rand_type - > - typename disable_if_c<is_matrix<T>::value || !is_rand<rand_type>::value,void>::type randomize_samples ( - T& t, - U& u, - rand_type& r - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(u.size() == t.size(), - "\t randomize_samples(t,u)" - << "\n\t invalid inputs were given to this function" - << "\n\t t.size(): " << t.size() - << "\n\t u.size(): " << u.size() - ); - - long n = t.size()-1; - while (n > 0) - { - // pick a random index to swap into t[n] - const unsigned long idx = r.get_random_32bit_number()%(n+1); - - // swap our randomly selected index into the n position - exchange(t[idx], t[n]); - exchange(u[idx], u[n]); - - --n; - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename U - > - typename disable_if<is_rand<U>,void>::type randomize_samples ( - T& t, - U& u - ) - { - rand r; - randomize_samples(t,u,r); - } - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename rand_type - > - typename enable_if_c<is_matrix<T>::value && is_rand<rand_type>::value,void>::type randomize_samples ( - T& t, - rand_type& r - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_vector(t), - "\t randomize_samples(t)" - << "\n\t invalid inputs were given to this function" - << "\n\t is_vector(t): " << (is_vector(t)? "true" : "false") - ); - - long n = t.size()-1; - while (n > 0) - { - // pick a random index to swap into t[n] - const unsigned long idx = r.get_random_32bit_number()%(n+1); - - // swap our randomly selected index into the n position - exchange(t(idx), t(n)); - - --n; - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename rand_type - > - typename disable_if_c<(is_matrix<T>::value==true)||(is_rand<rand_type>::value==false),void>::type randomize_samples ( - T& t, - rand_type& r - ) - { - long n = t.size()-1; - while (n > 0) - { - // pick a random index to swap into t[n] - const unsigned long idx = r.get_random_32bit_number()%(n+1); - - // swap our randomly selected index into the n position - exchange(t[idx], t[n]); - - --n; - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - void randomize_samples ( - T& t - ) - { - rand r; - randomize_samples(t,r); - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SVm_ - diff --git a/ml/dlib/dlib/svm/svm_abstract.h b/ml/dlib/dlib/svm/svm_abstract.h deleted file mode 100644 index ec92cf55b..000000000 --- a/ml/dlib/dlib/svm/svm_abstract.h +++ /dev/null @@ -1,604 +0,0 @@ -// Copyright (C) 2007 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_SVm_ABSTRACT_ -#ifdef DLIB_SVm_ABSTRACT_ - -#include <cmath> -#include <limits> -#include <sstream> -#include "../matrix/matrix_abstract.h" -#include "../algs.h" -#include "../serialize.h" -#include "function_abstract.h" -#include "kernel_abstract.h" -#include "svm_nu_trainer_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename U - > - bool is_learning_problem ( - const T& x, - const U& x_labels - ); - /*! - requires - - T == a matrix or something convertible to a matrix via mat() - - U == a matrix or something convertible to a matrix via mat() - ensures - - returns true if all of the following are true and false otherwise: - - is_col_vector(x) == true - - is_col_vector(x_labels) == true - - x.size() == x_labels.size() - - x.size() > 0 - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename U - > - bool is_binary_classification_problem ( - const T& x, - const U& x_labels - ); - /*! - requires - - T == a matrix or something convertible to a matrix via mat() - - U == a matrix or something convertible to a matrix via mat() - ensures - - returns true if all of the following are true and false otherwise: - - is_learning_problem(x, x_labels) == true - - x.size() > 1 - - there exists at least one sample from both the +1 and -1 classes. - (i.e. all samples can't have the same label) - - for all valid i: - - x_labels(i) == -1 or +1 - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename sequence_type - > - bool is_sequence_labeling_problem ( - const std::vector<sequence_type>& samples, - const std::vector<std::vector<unsigned long> >& labels - ); - /*! - ensures - - returns true if all of the following are true and false otherwise: - - is_learning_problem(samples, labels) == true - - for all valid i: - - samples[i].size() == labels[i].size() - (i.e. The size of a label sequence need to match the size of - its corresponding sample sequence) - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename sequence_type - > - bool is_sequence_segmentation_problem ( - const std::vector<sequence_type>& samples, - const std::vector<std::vector<std::pair<unsigned long,unsigned long> > >& segments - ); - /*! - ensures - - Note that a sequence segmentation problem is a task where you are given a - sequence of objects (e.g. words in a sentence) and your task is to find - certain types of sub-sequences (e.g. proper names). - - returns true if all of the following are true and false otherwise: - - is_learning_problem(samples, segments) == true - - for all valid i and j: - - We interpret segments[i][j] as defining a half open range starting - with segments[i][j].first and ending just before segments[i][j].second. - - segments[i][j].first < segments[i][j].second - - segments[i][j].second <= samples[i].size() - (i.e. Each segment must be contained within its associated sequence) - - segments[i][j] does not overlap with any of the other ranges in - segments[i]. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename lhs_type, - typename rhs_type - > - bool is_assignment_problem ( - const std::vector<std::pair<std::vector<lhs_type>, std::vector<rhs_type> > >& samples, - const std::vector<std::vector<long> >& labels - ); - /*! - ensures - - Note that an assignment problem is a task to associate each element of samples[i].first - to an element of samples[i].second, or to indicate that the element doesn't associate - with anything. Therefore, labels[i] should contain the association information for - samples[i]. - - This function returns true if all of the following are true and false otherwise: - - is_learning_problem(samples, labels) == true - - for all valid i: - - samples[i].first.size() == labels[i].size() - - for all valid j: - -1 <= labels[i][j] < samples[i].second.size() - (A value of -1 indicates that samples[i].first[j] isn't associated with anything. - All other values indicate the associating element of samples[i].second) - - All elements of labels[i] which are not equal to -1 are unique. That is, - multiple elements of samples[i].first can't associate to the same element - in samples[i].second. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename lhs_type, - typename rhs_type - > - bool is_forced_assignment_problem ( - const std::vector<std::pair<std::vector<lhs_type>, std::vector<rhs_type> > >& samples, - const std::vector<std::vector<long> >& labels - ); - /*! - ensures - - A regular assignment problem is allowed to indicate that all elements of - samples[i].first don't associate to anything. However, a forced assignment - problem is required to always associate an element of samples[i].first to - something in samples[i].second if there is an element of samples[i].second - that hasn't already been associated to something. - - This function returns true if all of the following are true and false otherwise: - - is_assignment_problem(samples, labels) == true - - for all valid i: - - let N denote the number of elements in labels[i] that are not equal to -1. - - min(samples[i].first.size(), samples[i].second.size()) == N - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename detection_type_, - typename label_type_ = long - > - struct labeled_detection - { - /*! - WHAT THIS OBJECT REPRESENTS - This is a simple object, like std::pair, it just holds two objects. It - serves the same purpose as std::pair except that it has informative names - describing its two members and is intended for use with track association - problems. - !*/ - - typedef detection_type_ detection_type; - typedef label_type_ label_type; - - detection_type det; - label_type label; - }; - - template < - typename detection_type_, - typename label_type_ - > - void serialize (const labeled_detection<detection_type_,label_type_>& item, std::ostream& out); - /*! - provides serialization support - !*/ - - template < - typename detection_type_, - typename label_type_ - > - void deserialize (labeled_detection<detection_type_,label_type_>& item, std::istream& in); - /*! - provides deserialization support - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename detection_type, - typename label_type - > - bool is_track_association_problem ( - const std::vector<std::vector<labeled_detection<detection_type,label_type> > >& samples - ); - /*! - ensures - - In this tracking model you get a set of detections at each time step and are - expected to associate each detection with a track or have it spawn a new - track. Therefore, a track association problem is a machine learning problem - where you are given a dataset of example input detections and are expected to - learn to perform the proper detection to track association. - - This function checks if samples can form a valid dataset for this machine - learning problem and returns true if this is the case. This means we should - interpret samples in the following way: - - samples is a track history and for each valid i: - - samples[i] is a set of labeled detections from the i-th time step. - Each detection has been labeled with its "true object identity". - That is, all the detection throughout the history with the same - label_type value are detections from the same object and therefore - should be associated to the same track. - Putting this all together, samples is a valid track association learning - problem if and only if the following are all true: - - samples.size() > 0 - - There are at least two values, i and j such that: - - i != j - - samples[i].size() > 0 - - samples[j].size() > 0 - Or in other words, there needs to be some detections in samples somewhere - or it is impossible to learn anything. - - for all valid i: - - for all valid j and k where j!=k: - - samples[i][j].label != samples[i][k].label - (i.e. the label_type values must be unique within each time step. - Or in other words, you can't have two detections on the same - object in a single time step.) - !*/ - - template < - typename detection_type, - typename label_type - > - bool is_track_association_problem ( - const std::vector<std::vector<std::vector<labeled_detection<detection_type,label_type> > > >& samples - ); - /*! - ensures - - returns true if is_track_association_problem(samples[i]) == true for all - valid i and false otherwise. - !*/ - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - double platt_scale ( - const std::pair<double,double>& params, - const double score - ); - /*! - ensures - - returns 1/(1 + std::exp(params.first*score + params.second)) - !*/ - -// ---------------------------------------------------------------------------------------- - - template <typename T, typename alloc> - std::pair<double,double> learn_platt_scaling ( - const std::vector<T,alloc>& scores, - const std::vector<T,alloc>& labels - ); - /*! - requires - - T should be either float, double, or long double - - is_binary_classification_problem(scores,labels) == true - ensures - - This function learns to map scalar values into well calibrated probabilities - using Platt scaling. In particular, it returns a params object such that, - for all valid i: - - platt_scale(params,scores[i]) == the scaled version of the scalar value - scores[i]. That is, the output is a number between 0 and 1. In - particular, platt_scale(params,scores[i]) is meant to represent the - probability that labels[i] == +1. - - This function is an implementation of the algorithm described in the following - papers: - Probabilistic Outputs for Support Vector Machines and Comparisons to - Regularized Likelihood Methods by John C. Platt. March 26, 1999 - - A Note on Platt's Probabilistic Outputs for Support Vector Machines - by Hsuan-Tien Lin, Chih-Jen Lin, and Ruby C. Weng - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type, - typename sample_vector_type, - typename label_vector_type - > - const probabilistic_function<typename trainer_type::trained_function_type> - train_probabilistic_decision_function ( - const trainer_type& trainer, - const sample_vector_type& x, - const label_vector_type& y, - const long folds - ); - /*! - requires - - 1 < folds <= x.size() - - is_binary_classification_problem(x,y) == true - - x and y must be std::vector objects or types with a compatible interface. - - trainer_type == some kind of batch trainer object (e.g. svm_nu_trainer) - ensures - - trains a classifier given the training samples in x and labels in y. - - returns a probabilistic_decision_function that represents the trained classifier. - - The parameters of the probability model are estimated by performing k-fold - cross validation. - - The number of folds used is given by the folds argument. - - This function is implemented using learn_platt_scaling() - throws - - any exceptions thrown by trainer.train() - - std::bad_alloc - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type - > - trainer_adapter_probabilistic<trainer_type> probabilistic ( - const trainer_type& trainer, - const long folds - ); - /*! - requires - - 1 < folds <= x.size() - - trainer_type == some kind of batch trainer object (e.g. svm_nu_trainer) - ensures - - returns a trainer adapter TA such that calling TA.train(samples, labels) - returns the same object as calling train_probabilistic_decision_function(trainer,samples,labels,folds). - !*/ - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// Miscellaneous functions -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type, - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const matrix<double,1,2> cross_validate_trainer ( - const trainer_type& trainer, - const in_sample_vector_type& x, - const in_scalar_vector_type& y, - const long folds - ); - /*! - requires - - is_binary_classification_problem(x,y) == true - - 1 < folds <= std::min(sum(y>0),sum(y<0)) - (e.g. There must be at least as many examples of each class as there are folds) - - trainer_type == some kind of binary classification trainer object (e.g. svm_nu_trainer) - ensures - - performs k-fold cross validation by using the given trainer to solve the - given binary classification problem for the given number of folds. - Each fold is tested using the output of the trainer and the average - classification accuracy from all folds is returned. - - The average accuracy is computed by running test_binary_decision_function() - on each fold and its output is averaged and returned. - - The number of folds used is given by the folds argument. - throws - - any exceptions thrown by trainer.train() - - std::bad_alloc - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename dec_funct_type, - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const matrix<double,1,2> test_binary_decision_function ( - const dec_funct_type& dec_funct, - const in_sample_vector_type& x_test, - const in_scalar_vector_type& y_test - ); - /*! - requires - - is_binary_classification_problem(x_test,y_test) == true - - dec_funct_type == some kind of decision function object (e.g. decision_function) - ensures - - Tests the given decision function by calling it on the x_test and y_test samples. - The output of dec_funct is interpreted as a prediction for the +1 class - if its output is >= 0 and as a prediction for the -1 class otherwise. - - The test accuracy is returned in a row vector, let us call it R. Both - quantities in R are numbers between 0 and 1 which represent the fraction - of examples correctly classified. R(0) is the fraction of +1 examples - correctly classified and R(1) is the fraction of -1 examples correctly - classified. - throws - - std::bad_alloc - !*/ - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename U - > - void randomize_samples ( - T& samples, - U& labels - ); - /*! - requires - - T == a matrix object or an object compatible with std::vector that contains - a swappable type. - - U == a matrix object or an object compatible with std::vector that contains - a swappable type. - - if samples or labels are matrix objects then is_vector(samples) == true and - is_vector(labels) == true - - samples.size() == labels.size() - ensures - - randomizes the order of the samples and labels but preserves - the pairing between each sample and its label - - A default initialized random number generator is used to perform the randomizing. - Note that this means that each call this this function does the same thing. - That is, the random number generator always uses the same seed. - - for all valid i: - - let r == the random index samples(i) was moved to. then: - - #labels(r) == labels(i) - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename U, - typename rand_type - > - void randomize_samples ( - T& samples, - U& labels, - rand_type& rnd - ); - /*! - requires - - T == a matrix object or an object compatible with std::vector that contains - a swappable type. - - U == a matrix object or an object compatible with std::vector that contains - a swappable type. - - if samples or labels are matrix objects then is_vector(samples) == true and - is_vector(labels) == true - - samples.size() == labels.size() - - rand_type == a type that implements the dlib/rand/rand_kernel_abstract.h interface - ensures - - randomizes the order of the samples and labels but preserves - the pairing between each sample and its label - - the given rnd random number generator object is used to do the randomizing - - for all valid i: - - let r == the random index samples(i) was moved to. then: - - #labels(r) == labels(i) - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - void randomize_samples ( - T& samples - ); - /*! - requires - - T == a matrix object or an object compatible with std::vector that contains - a swappable type. - - if (samples is a matrix) then - - is_vector(samples) == true - ensures - - randomizes the order of the elements inside samples - - A default initialized random number generator is used to perform the randomizing. - Note that this means that each call this this function does the same thing. - That is, the random number generator always uses the same seed. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename rand_type - > - void randomize_samples ( - T& samples, - rand_type& rnd - ); - /*! - requires - - T == a matrix object or an object compatible with std::vector that contains - a swappable type. - - rand_type == a type that implements the dlib/rand/rand_kernel_abstract.h interface - - if (samples is a matrix) then - - is_vector(samples) == true - ensures - - randomizes the order of the elements inside samples - - the given rnd random number generator object is used to do the randomizing - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename U, - typename V - > - void randomize_samples ( - T& samples, - U& labels, - V& auxiliary - ); - /*! - requires - - T == a matrix object or an object compatible with std::vector that contains - a swappable type. - - U == a matrix object or an object compatible with std::vector that contains - a swappable type. - - V == a matrix object or an object compatible with std::vector that contains - a swappable type. - - if (samples, labels, or auxiliary are matrix objects) then - - is_vector(samples) == true - - is_vector(labels) == true - - is_vector(auxiliary) == true - - samples.size() == labels.size() == auxiliary.size() - ensures - - randomizes the order of the samples, labels, and auxiliary but preserves the - pairing between each sample, its label, and its auxiliary value. - - A default initialized random number generator is used to perform the - randomizing. Note that this means that each call this this function does the - same thing. That is, the random number generator always uses the same seed. - - for all valid i: - - let r == the random index samples(i) was moved to. then: - - #labels(r) == labels(i) - - #auxiliary(r) == auxiliary(i) - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename U, - typename V, - typename rand_type - > - void randomize_samples ( - T& samples, - U& labels, - V& auxiliary, - rand_type& rnd - ); - /*! - requires - - T == a matrix object or an object compatible with std::vector that contains - a swappable type. - - U == a matrix object or an object compatible with std::vector that contains - a swappable type. - - V == a matrix object or an object compatible with std::vector that contains - a swappable type. - - if (samples, labels, or auxiliary are matrix objects) then - - is_vector(samples) == true - - is_vector(labels) == true - - is_vector(auxiliary) == true - - samples.size() == labels.size() == auxiliary.size() - - rand_type == a type that implements the dlib/rand/rand_kernel_abstract.h interface - ensures - - randomizes the order of the samples, labels, and auxiliary but preserves the - pairing between each sample, its label, and its auxiliary value. - - the given rnd random number generator object is used to do the randomizing - - for all valid i: - - let r == the random index samples(i) was moved to. then: - - #labels(r) == labels(i) - - #auxiliary(r) == auxiliary(i) - !*/ - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SVm_ABSTRACT_ - - diff --git a/ml/dlib/dlib/svm/svm_c_ekm_trainer.h b/ml/dlib/dlib/svm/svm_c_ekm_trainer.h deleted file mode 100644 index 735e0f22e..000000000 --- a/ml/dlib/dlib/svm/svm_c_ekm_trainer.h +++ /dev/null @@ -1,636 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_SVM_C_EKm_TRAINER_Hh_ -#define DLIB_SVM_C_EKm_TRAINER_Hh_ - -#include "../algs.h" -#include "function.h" -#include "kernel.h" -#include "empirical_kernel_map.h" -#include "svm_c_linear_trainer.h" -#include "svm_c_ekm_trainer_abstract.h" -#include "../statistics.h" -#include "../rand.h" -#include <vector> - -namespace dlib -{ - template < - typename K - > - class svm_c_ekm_trainer - { - - public: - typedef K kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef decision_function<kernel_type> trained_function_type; - - svm_c_ekm_trainer ( - ) - { - verbose = false; - ekm_stale = true; - - initial_basis_size = 10; - basis_size_increment = 50; - max_basis_size = 300; - } - - explicit svm_c_ekm_trainer ( - const scalar_type& C - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(C > 0, - "\t svm_c_ekm_trainer::svm_c_ekm_trainer()" - << "\n\t C must be greater than 0" - << "\n\t C: " << C - << "\n\t this: " << this - ); - - - ocas.set_c(C); - verbose = false; - ekm_stale = true; - - initial_basis_size = 10; - basis_size_increment = 50; - max_basis_size = 300; - } - - void set_epsilon ( - scalar_type eps - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(eps > 0, - "\t void svm_c_ekm_trainer::set_epsilon()" - << "\n\t eps must be greater than 0" - << "\n\t eps: " << eps - << "\n\t this: " << this - ); - - ocas.set_epsilon(eps); - } - - const scalar_type get_epsilon ( - ) const - { - return ocas.get_epsilon(); - } - - void set_max_iterations ( - unsigned long max_iter - ) - { - ocas.set_max_iterations(max_iter); - } - - unsigned long get_max_iterations ( - ) - { - return ocas.get_max_iterations(); - } - - void be_verbose ( - ) - { - verbose = true; - ocas.be_quiet(); - } - - void be_very_verbose ( - ) - { - verbose = true; - ocas.be_verbose(); - } - - void be_quiet ( - ) - { - verbose = false; - ocas.be_quiet(); - } - - void set_oca ( - const oca& item - ) - { - ocas.set_oca(item); - } - - const oca get_oca ( - ) const - { - return ocas.get_oca(); - } - - const kernel_type get_kernel ( - ) const - { - return kern; - } - - void set_kernel ( - const kernel_type& k - ) - { - kern = k; - ekm_stale = true; - } - - template <typename T> - void set_basis ( - const T& basis_samples - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(basis_samples.size() > 0 && is_vector(mat(basis_samples)), - "\tvoid svm_c_ekm_trainer::set_basis(basis_samples)" - << "\n\t You have to give a non-empty set of basis_samples and it must be a vector" - << "\n\t basis_samples.size(): " << basis_samples.size() - << "\n\t is_vector(mat(basis_samples)): " << is_vector(mat(basis_samples)) - << "\n\t this: " << this - ); - - basis = mat(basis_samples); - ekm_stale = true; - } - - bool basis_loaded( - ) const - { - return (basis.size() != 0); - } - - void clear_basis ( - ) - { - basis.set_size(0); - ekm.clear(); - ekm_stale = true; - } - - unsigned long get_max_basis_size ( - ) const - { - return max_basis_size; - } - - void set_max_basis_size ( - unsigned long max_basis_size_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(max_basis_size_ > 0, - "\t void svm_c_ekm_trainer::set_max_basis_size()" - << "\n\t max_basis_size_ must be greater than 0" - << "\n\t max_basis_size_: " << max_basis_size_ - << "\n\t this: " << this - ); - - max_basis_size = max_basis_size_; - if (initial_basis_size > max_basis_size) - initial_basis_size = max_basis_size; - } - - unsigned long get_initial_basis_size ( - ) const - { - return initial_basis_size; - } - - void set_initial_basis_size ( - unsigned long initial_basis_size_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(initial_basis_size_ > 0, - "\t void svm_c_ekm_trainer::set_initial_basis_size()" - << "\n\t initial_basis_size_ must be greater than 0" - << "\n\t initial_basis_size_: " << initial_basis_size_ - << "\n\t this: " << this - ); - - initial_basis_size = initial_basis_size_; - - if (initial_basis_size > max_basis_size) - max_basis_size = initial_basis_size; - } - - unsigned long get_basis_size_increment ( - ) const - { - return basis_size_increment; - } - - void set_basis_size_increment ( - unsigned long basis_size_increment_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(basis_size_increment_ > 0, - "\t void svm_c_ekm_trainer::set_basis_size_increment()" - << "\n\t basis_size_increment_ must be greater than 0" - << "\n\t basis_size_increment_: " << basis_size_increment_ - << "\n\t this: " << this - ); - - basis_size_increment = basis_size_increment_; - } - - void set_c ( - scalar_type C - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(C > 0, - "\t void svm_c_ekm_trainer::set_c()" - << "\n\t C must be greater than 0" - << "\n\t C: " << C - << "\n\t this: " << this - ); - - ocas.set_c(C); - } - - const scalar_type get_c_class1 ( - ) const - { - return ocas.get_c_class1(); - } - - const scalar_type get_c_class2 ( - ) const - { - return ocas.get_c_class2(); - } - - void set_c_class1 ( - scalar_type C - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(C > 0, - "\t void svm_c_ekm_trainer::set_c_class1()" - << "\n\t C must be greater than 0" - << "\n\t C: " << C - << "\n\t this: " << this - ); - - ocas.set_c_class1(C); - } - - void set_c_class2 ( - scalar_type C - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(C > 0, - "\t void svm_c_ekm_trainer::set_c_class2()" - << "\n\t C must be greater than 0" - << "\n\t C: " << C - << "\n\t this: " << this - ); - - ocas.set_c_class2(C); - } - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const - { - scalar_type obj; - if (basis_loaded()) - return do_train_user_basis(mat(x),mat(y),obj); - else - return do_train_auto_basis(mat(x),mat(y),obj); - } - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y, - scalar_type& svm_objective - ) const - { - if (basis_loaded()) - return do_train_user_basis(mat(x),mat(y),svm_objective); - else - return do_train_auto_basis(mat(x),mat(y),svm_objective); - } - - - private: - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> do_train_user_basis ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y, - scalar_type& svm_objective - ) const - /*! - requires - - basis_loaded() == true - ensures - - trains an SVM with the user supplied basis - !*/ - { - // make sure requires clause is not broken - DLIB_ASSERT(is_binary_classification_problem(x,y) == true, - "\t decision_function svm_c_ekm_trainer::train(x,y)" - << "\n\t invalid inputs were given to this function" - << "\n\t x.nr(): " << x.nr() - << "\n\t y.nr(): " << y.nr() - << "\n\t x.nc(): " << x.nc() - << "\n\t y.nc(): " << y.nc() - << "\n\t is_binary_classification_problem(x,y): " << is_binary_classification_problem(x,y) - ); - - if (ekm_stale) - { - ekm.load(kern, basis); - ekm_stale = false; - } - - // project all the samples with the ekm - running_stats<scalar_type> rs; - std::vector<matrix<scalar_type,0,1, mem_manager_type> > proj_samples; - proj_samples.reserve(x.size()); - for (long i = 0; i < x.size(); ++i) - { - if (verbose) - { - scalar_type err; - proj_samples.push_back(ekm.project(x(i), err)); - rs.add(err); - } - else - { - proj_samples.push_back(ekm.project(x(i))); - } - } - - if (verbose) - { - std::cout << "\nMean EKM projection error: " << rs.mean() << std::endl; - std::cout << "Standard deviation of EKM projection error: " << rs.stddev() << std::endl; - } - - // now do the training - decision_function<linear_kernel<matrix<scalar_type,0,1, mem_manager_type> > > df; - df = ocas.train(proj_samples, y, svm_objective); - - if (verbose) - { - std::cout << "Final svm objective: " << svm_objective << std::endl; - } - - decision_function<kernel_type> final_df; - final_df = ekm.convert_to_decision_function(df.basis_vectors(0)); - final_df.b = df.b; - return final_df; - } - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> do_train_auto_basis ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y, - scalar_type& svm_objective - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(is_binary_classification_problem(x,y) == true, - "\t decision_function svm_c_ekm_trainer::train(x,y)" - << "\n\t invalid inputs were given to this function" - << "\n\t x.nr(): " << x.nr() - << "\n\t y.nr(): " << y.nr() - << "\n\t x.nc(): " << x.nc() - << "\n\t y.nc(): " << y.nc() - << "\n\t is_binary_classification_problem(x,y): " << is_binary_classification_problem(x,y) - ); - - - std::vector<matrix<scalar_type,0,1, mem_manager_type> > proj_samples(x.size()); - decision_function<linear_kernel<matrix<scalar_type,0,1, mem_manager_type> > > df; - - // we will use a linearly_independent_subset_finder to store our basis set. - linearly_independent_subset_finder<kernel_type> lisf(get_kernel(), max_basis_size); - - dlib::rand rnd; - - // first pick the initial basis set randomly - for (unsigned long i = 0; i < 10*initial_basis_size && lisf.size() < initial_basis_size; ++i) - { - lisf.add(x(rnd.get_random_32bit_number()%x.size())); - } - - ekm.load(lisf); - - // first project all samples into the span of the current basis - for (long i = 0; i < x.size(); ++i) - { - proj_samples[i] = ekm.project(x(i)); - } - - - svm_c_linear_trainer<linear_kernel<matrix<scalar_type,0,1,mem_manager_type> > > trainer(ocas); - - const scalar_type min_epsilon = trainer.get_epsilon(); - // while we are determining what the basis set will be we are going to use a very - // lose stopping condition. We will tighten it back up before producing the - // final decision_function. - trainer.set_epsilon(0.2); - - scalar_type prev_svm_objective = std::numeric_limits<scalar_type>::max(); - - empirical_kernel_map<kernel_type> prev_ekm; - - // This loop is where we try to generate a basis for SVM training. We will - // do this by repeatedly training the SVM and adding a few points which violate the - // margin to the basis in each iteration. - while (true) - { - // if the basis is already as big as it's going to get then just do the most - // accurate training right now. - if (lisf.size() == max_basis_size) - trainer.set_epsilon(min_epsilon); - - while (true) - { - // now do the training. - df = trainer.train(proj_samples, y, svm_objective); - - if (svm_objective < prev_svm_objective) - break; - - // If the training didn't reduce the objective more than last time then - // try lowering the epsilon and doing it again. - if (trainer.get_epsilon() > min_epsilon) - { - trainer.set_epsilon(std::max(trainer.get_epsilon()*0.5, min_epsilon)); - if (verbose) - std::cout << " *** Reducing epsilon to " << trainer.get_epsilon() << std::endl; - } - else - break; - } - - if (verbose) - { - std::cout << "svm objective: " << svm_objective << std::endl; - std::cout << "basis size: " << lisf.size() << std::endl; - } - - // if we failed to make progress on this iteration then we are done - if (svm_objective >= prev_svm_objective) - break; - - prev_svm_objective = svm_objective; - - // now add more elements to the basis - unsigned long count = 0; - for (unsigned long j = 0; - (j < 100*basis_size_increment) && (count < basis_size_increment) && (lisf.size() < max_basis_size); - ++j) - { - // pick a random sample - const unsigned long idx = rnd.get_random_32bit_number()%x.size(); - // If it is a margin violator then it is useful to add it into the basis set. - if (df(proj_samples[idx])*y(idx) < 1) - { - // Add the sample into the basis set if it is linearly independent of all the - // vectors already in the basis set. - if (lisf.add(x(idx))) - { - ++count; - } - } - } - // if we couldn't add any more basis vectors then stop - if (count == 0) - { - if (verbose) - std::cout << "Stopping, couldn't add more basis vectors." << std::endl; - break; - } - - - // Project all the samples into the span of our newly enlarged basis. We will do this - // using the special transformation in the EKM that lets us project from a smaller - // basis set to a larger without needing to reevaluate kernel functions we have already - // computed. - ekm.swap(prev_ekm); - ekm.load(lisf); - projection_function<kernel_type> proj_part; - matrix<double> prev_to_new; - prev_ekm.get_transformation_to(ekm, prev_to_new, proj_part); - - - matrix<scalar_type,0,1, mem_manager_type> temp; - for (long i = 0; i < x.size(); ++i) - { - // assign to temporary to avoid memory allocation that would result if we - // assigned this expression straight into proj_samples[i] - temp = prev_to_new*proj_samples[i] + proj_part(x(i)); - proj_samples[i] = temp; - - } - } - - // Reproject all the data samples using the final basis. We could just use what we - // already have but the recursive thing done above to compute the proj_samples - // might have accumulated a little numerical error. So lets just be safe. - running_stats<scalar_type> rs, rs_margin; - for (long i = 0; i < x.size(); ++i) - { - if (verbose) - { - scalar_type err; - proj_samples[i] = ekm.project(x(i),err); - rs.add(err); - // if this point is within the margin - if (df(proj_samples[i])*y(i) < 1) - rs_margin.add(err); - } - else - { - proj_samples[i] = ekm.project(x(i)); - } - } - - // do the final training - trainer.set_epsilon(min_epsilon); - df = trainer.train(proj_samples, y, svm_objective); - - - if (verbose) - { - std::cout << "\nMean EKM projection error: " << rs.mean() << std::endl; - std::cout << "Standard deviation of EKM projection error: " << rs.stddev() << std::endl; - std::cout << "Mean EKM projection error for margin violators: " << rs_margin.mean() << std::endl; - std::cout << "Standard deviation of EKM projection error for margin violators: " << ((rs_margin.current_n()>1)?rs_margin.stddev():0) << std::endl; - - std::cout << "Final svm objective: " << svm_objective << std::endl; - } - - - decision_function<kernel_type> final_df; - final_df = ekm.convert_to_decision_function(df.basis_vectors(0)); - final_df.b = df.b; - - // we don't need the ekm anymore so clear it out - ekm.clear(); - - return final_df; - } - - - - - /*! - CONVENTION - - if (ekm_stale) then - - kern or basis have changed since the last time - they were loaded into the ekm - !*/ - - svm_c_linear_trainer<linear_kernel<matrix<scalar_type,0,1,mem_manager_type> > > ocas; - bool verbose; - - kernel_type kern; - unsigned long max_basis_size; - unsigned long basis_size_increment; - unsigned long initial_basis_size; - - - matrix<sample_type,0,1,mem_manager_type> basis; - mutable empirical_kernel_map<kernel_type> ekm; - mutable bool ekm_stale; - - }; - -} - -#endif // DLIB_SVM_C_EKm_TRAINER_Hh_ - - - diff --git a/ml/dlib/dlib/svm/svm_c_ekm_trainer_abstract.h b/ml/dlib/dlib/svm/svm_c_ekm_trainer_abstract.h deleted file mode 100644 index d1ba2bf5f..000000000 --- a/ml/dlib/dlib/svm/svm_c_ekm_trainer_abstract.h +++ /dev/null @@ -1,384 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_SVM_C_EKm_TRAINER_ABSTRACT_Hh_ -#ifdef DLIB_SVM_C_EKm_TRAINER_ABSTRACT_Hh_ - -#include "../algs.h" -#include "function_abstract.h" -#include "kernel_abstract.h" -#include "empirical_kernel_map_abstract.h" -#include "svm_c_linear_trainer_abstract.h" - -namespace dlib -{ - template < - typename K - > - class svm_c_ekm_trainer - { - /*! - REQUIREMENTS ON K - is a kernel function object as defined in dlib/svm/kernel_abstract.h - - WHAT THIS OBJECT REPRESENTS - This object represents a tool for training the C formulation of - a support vector machine. It is implemented using the empirical_kernel_map - to kernelize the svm_c_linear_trainer. This makes it a very fast algorithm - capable of learning from very large datasets. - !*/ - - public: - typedef K kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef decision_function<kernel_type> trained_function_type; - - svm_c_ekm_trainer ( - ); - /*! - ensures - - This object is properly initialized and ready to be used - to train a support vector machine. - - #get_oca() == oca() (i.e. an instance of oca with default parameters) - - #get_c_class1() == 1 - - #get_c_class2() == 1 - - #get_epsilon() == 0.001 - - #basis_loaded() == false - - #get_initial_basis_size() == 10 - - #get_basis_size_increment() == 50 - - #get_max_basis_size() == 300 - - this object will not be verbose unless be_verbose() is called - - #get_max_iterations() == 10000 - !*/ - - explicit svm_c_ekm_trainer ( - const scalar_type& C - ); - /*! - requires - - C > 0 - ensures - - This object is properly initialized and ready to be used - to train a support vector machine. - - #get_oca() == oca() (i.e. an instance of oca with default parameters) - - #get_c_class1() == C - - #get_c_class2() == C - - #get_epsilon() == 0.001 - - #basis_loaded() == false - - #get_initial_basis_size() == 10 - - #get_basis_size_increment() == 50 - - #get_max_basis_size() == 300 - - this object will not be verbose unless be_verbose() is called - - #get_max_iterations() == 10000 - !*/ - - void set_epsilon ( - scalar_type eps - ); - /*! - requires - - eps > 0 - ensures - - #get_epsilon() == eps - !*/ - - const scalar_type get_epsilon ( - ) const; - /*! - ensures - - returns the error epsilon that determines when training should stop. - Smaller values may result in a more accurate solution but take longer - to execute. - !*/ - - void set_max_iterations ( - unsigned long max_iter - ); - /*! - ensures - - #get_max_iterations() == max_iter - !*/ - - unsigned long get_max_iterations ( - ); - /*! - ensures - - returns the maximum number of iterations the SVM optimizer is allowed to - run before it is required to stop and return a result. - !*/ - - void be_verbose ( - ); - /*! - ensures - - This object will print status messages to standard out so that a - user can observe the progress of the algorithm. - !*/ - - void be_very_verbose ( - ); - /*! - ensures - - This object will print a lot of status messages to standard out so that a - user can observe the progress of the algorithm. In addition to the - few status messages normal verbosity produces this setting also causes - the underlying svm_c_linear_trainer to be verbose. - !*/ - - void be_quiet ( - ); - /*! - ensures - - this object will not print anything to standard out - !*/ - - void set_oca ( - const oca& item - ); - /*! - ensures - - #get_oca() == item - !*/ - - const oca get_oca ( - ) const; - /*! - ensures - - returns a copy of the optimizer used to solve the SVM problem. - !*/ - - const kernel_type get_kernel ( - ) const; - /*! - ensures - - returns a copy of the kernel function in use by this object - !*/ - - void set_kernel ( - const kernel_type& k - ); - /*! - ensures - - #get_kernel() == k - !*/ - - template <typename T> - void set_basis ( - const T& basis_samples - ); - /*! - requires - - T must be a dlib::matrix type or something convertible to a matrix via mat() - (e.g. a std::vector) - - is_vector(basis_samples) == true - - basis_samples.size() > 0 - - get_kernel() must be capable of operating on the elements of basis_samples. That is, - expressions such as get_kernel()(basis_samples(0), basis_samples(0)) should make sense. - ensures - - #basis_loaded() == true - - training will be carried out in the span of the given basis_samples - !*/ - - bool basis_loaded ( - ) const; - /*! - ensures - - returns true if this object has been loaded with user supplied basis vectors and false otherwise. - !*/ - - void clear_basis ( - ); - /*! - ensures - - #basis_loaded() == false - !*/ - - unsigned long get_max_basis_size ( - ) const; - /*! - ensures - - returns the maximum number of basis vectors this object is allowed - to use. This parameter only matters when the user has not supplied - a basis via set_basis(). - !*/ - - void set_max_basis_size ( - unsigned long max_basis_size - ); - /*! - requires - - max_basis_size > 0 - ensures - - #get_max_basis_size() == max_basis_size - - if (get_initial_basis_size() > max_basis_size) then - - #get_initial_basis_size() == max_basis_size - !*/ - - unsigned long get_initial_basis_size ( - ) const; - /*! - ensures - - If the user does not supply a basis via set_basis() then this object - will generate one automatically. It does this by starting with - a small basis of size N and repeatedly adds basis vectors to it - until a stopping condition is reached. This function returns that - initial size N. - !*/ - - void set_initial_basis_size ( - unsigned long initial_basis_size - ); - /*! - requires - - initial_basis_size > 0 - ensures - - #get_initial_basis_size() == initial_basis_size - - if (initial_basis_size > get_max_basis_size()) then - - #get_max_basis_size() == initial_basis_size - !*/ - - unsigned long get_basis_size_increment ( - ) const; - /*! - ensures - - If the user does not supply a basis via set_basis() then this object - will generate one automatically. It does this by starting with a small - basis and repeatedly adds sets of N basis vectors to it until a stopping - condition is reached. This function returns that increment size N. - !*/ - - void set_basis_size_increment ( - unsigned long basis_size_increment - ); - /*! - requires - - basis_size_increment > 0 - ensures - - #get_basis_size_increment() == basis_size_increment - !*/ - - void set_c ( - scalar_type C - ); - /*! - requires - - C > 0 - ensures - - #get_c_class1() == C - - #get_c_class2() == C - !*/ - - const scalar_type get_c_class1 ( - ) const; - /*! - ensures - - returns the SVM regularization parameter for the +1 class. - It is the parameter that determines the trade off between - trying to fit the +1 training data exactly or allowing more errors - but hopefully improving the generalization ability of the - resulting classifier. Larger values encourage exact fitting - while smaller values of C may encourage better generalization. - !*/ - - const scalar_type get_c_class2 ( - ) const; - /*! - ensures - - returns the SVM regularization parameter for the -1 class. - It is the parameter that determines the trade off between - trying to fit the -1 training data exactly or allowing more errors - but hopefully improving the generalization ability of the - resulting classifier. Larger values encourage exact fitting - while smaller values of C may encourage better generalization. - !*/ - - void set_c_class1 ( - scalar_type C - ); - /*! - requires - - C > 0 - ensures - - #get_c_class1() == C - !*/ - - void set_c_class2 ( - scalar_type C - ); - /*! - requires - - C > 0 - ensures - - #get_c_class2() == C - !*/ - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const; - /*! - requires - - is_binary_classification_problem(x,y) == true - - x == a matrix or something convertible to a matrix via mat(). - Also, x should contain sample_type objects. - - y == a matrix or something convertible to a matrix via mat(). - Also, y should contain scalar_type objects. - ensures - - trains a C support vector classifier given the training samples in x and - labels in y. - - if (basis_loaded()) then - - training will be carried out in the span of the user supplied basis vectors - - else - - this object will attempt to automatically select an appropriate basis - - - returns a decision function F with the following properties: - - if (new_x is a sample predicted have +1 label) then - - F(new_x) >= 0 - - else - - F(new_x) < 0 - !*/ - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y, - scalar_type& svm_objective - ) const; - /*! - requires - - is_binary_classification_problem(x,y) == true - - x == a matrix or something convertible to a matrix via mat(). - Also, x should contain sample_type objects. - - y == a matrix or something convertible to a matrix via mat(). - Also, y should contain scalar_type objects. - ensures - - trains a C support vector classifier given the training samples in x and - labels in y. - - if (basis_loaded()) then - - training will be carried out in the span of the user supplied basis vectors - - else - - this object will attempt to automatically select an appropriate basis - - - #svm_objective == the final value of the SVM objective function - - returns a decision function F with the following properties: - - if (new_x is a sample predicted have +1 label) then - - F(new_x) >= 0 - - else - - F(new_x) < 0 - !*/ - - }; - -} - -#endif // DLIB_SVM_C_EKm_TRAINER_ABSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/svm/svm_c_linear_dcd_trainer.h b/ml/dlib/dlib/svm/svm_c_linear_dcd_trainer.h deleted file mode 100644 index 039b70993..000000000 --- a/ml/dlib/dlib/svm/svm_c_linear_dcd_trainer.h +++ /dev/null @@ -1,712 +0,0 @@ -// Copyright (C) 2012 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_SVm_C_LINEAR_DCD_TRAINER_Hh_ -#define DLIB_SVm_C_LINEAR_DCD_TRAINER_Hh_ - -#include "svm_c_linear_dcd_trainer_abstract.h" -#include <cmath> -#include <limits> -#include "../matrix.h" -#include "../algs.h" -#include "../rand.h" -#include "svm.h" - -#include "function.h" -#include "kernel.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename K - > - class svm_c_linear_dcd_trainer - { - public: - typedef K kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef decision_function<kernel_type> trained_function_type; - typedef typename decision_function<K>::sample_vector_type sample_vector_type; - typedef typename decision_function<K>::scalar_vector_type scalar_vector_type; - - // You are getting a compiler error on this line because you supplied a non-linear - // kernel to the svm_c_linear_dcd_trainer object. You have to use one of the - // linear kernels with this trainer. - COMPILE_TIME_ASSERT((is_same_type<K, linear_kernel<sample_type> >::value || - is_same_type<K, sparse_linear_kernel<sample_type> >::value )); - - svm_c_linear_dcd_trainer ( - ) : - Cpos(1), - Cneg(1), - eps(0.1), - max_iterations(10000), - verbose(false), - have_bias(true), - last_weight_1(false), - do_shrinking(true), - do_svm_l2(false) - { - } - - explicit svm_c_linear_dcd_trainer ( - const scalar_type& C_ - ) : - Cpos(C_), - Cneg(C_), - eps(0.1), - max_iterations(10000), - verbose(false), - have_bias(true), - last_weight_1(false), - do_shrinking(true), - do_svm_l2(false) - { - // make sure requires clause is not broken - DLIB_ASSERT(0 < C_, - "\tsvm_c_trainer::svm_c_linear_dcd_trainer(kernel,C)" - << "\n\t invalid inputs were given to this function" - << "\n\t C_: " << C_ - ); - } - - bool includes_bias ( - ) const - { - return have_bias; - } - - void include_bias ( - bool should_have_bias - ) - { - have_bias = should_have_bias; - } - - bool forces_last_weight_to_1 ( - ) const - { - return last_weight_1; - } - - void force_last_weight_to_1 ( - bool should_last_weight_be_1 - ) - { - last_weight_1 = should_last_weight_be_1; - } - - bool shrinking_enabled ( - ) const { return do_shrinking; } - - void enable_shrinking ( - bool enabled - ) { do_shrinking = enabled; } - - bool solving_svm_l2_problem ( - ) const { return do_svm_l2; } - - void solve_svm_l2_problem ( - bool enabled - ) { do_svm_l2 = enabled; } - - void be_verbose ( - ) - { - verbose = true; - } - - void be_quiet ( - ) - { - verbose = false; - } - - void set_epsilon ( - scalar_type eps_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(eps_ > 0, - "\tvoid svm_c_linear_dcd_trainer::set_epsilon(eps_)" - << "\n\t invalid inputs were given to this function" - << "\n\t eps_: " << eps_ - ); - eps = eps_; - } - - const scalar_type get_epsilon ( - ) const - { - return eps; - } - - const kernel_type& get_kernel ( - ) const - { - return kernel_type(); - } - - unsigned long get_max_iterations ( - ) const { return max_iterations; } - - void set_max_iterations ( - unsigned long max_iter - ) - { - max_iterations = max_iter; - } - - void set_c ( - scalar_type C - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(C > 0, - "\t void svm_c_linear_dcd_trainer::set_c()" - << "\n\t C must be greater than 0" - << "\n\t C: " << C - << "\n\t this: " << this - ); - - Cpos = C; - Cneg = C; - } - - const scalar_type get_c_class1 ( - ) const - { - return Cpos; - } - - const scalar_type get_c_class2 ( - ) const - { - return Cneg; - } - - void set_c_class1 ( - scalar_type C - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(C > 0, - "\t void svm_c_linear_dcd_trainer::set_c_class1()" - << "\n\t C must be greater than 0" - << "\n\t C: " << C - << "\n\t this: " << this - ); - - Cpos = C; - } - - void set_c_class2 ( - scalar_type C - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(C > 0, - "\t void svm_c_linear_dcd_trainer::set_c_class2()" - << "\n\t C must be greater than 0" - << "\n\t C: " << C - << "\n\t this: " << this - ); - - Cneg = C; - } - - class optimizer_state - { - friend class svm_c_linear_dcd_trainer; - - public: - optimizer_state() : did_init(false) {} - - private: - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - void init( - const in_sample_vector_type& x, - const in_scalar_vector_type& y, - bool have_bias_, - bool last_weight_1_, - bool do_svm_l2_, - scalar_type Cpos, - scalar_type Cneg - ) - { - const long new_dims = max_index_plus_one(x); - long new_idx = 0; - - if (did_init) - { - DLIB_CASSERT(have_bias_ == have_bias && - last_weight_1_ == last_weight_1, - "\t decision_function svm_c_linear_dcd_trainer::train(x,y,state)" - << "\n\t The given state object is invalid because the previous trainer was configured differently." - << "\n\t have_bias_: " << have_bias_ - << "\n\t have_bias: " << have_bias - << "\n\t last_weight_1_: " << last_weight_1_ - << "\n\t last_weight_1: " << last_weight_1 - ); - - DLIB_CASSERT( new_dims >= dims, - "\t decision_function svm_c_linear_dcd_trainer::train(x,y,state)" - << "\n\t The given state object is invalid because the training data dimensions have shrunk." - << "\n\t new_dims: " << new_dims - << "\n\t dims: " << dims - ); - - DLIB_CASSERT( x.size() >= static_cast<long>(alpha.size()), - "\t decision_function svm_c_linear_dcd_trainer::train(x,y,state)" - << "\n\t The given state object is invalid because the training data has fewer samples than previously." - << "\n\t x.size(): " << x.size() - << "\n\t alpha.size(): " << alpha.size() - ); - - // make sure we amortize the cost of growing the alpha vector. - if (alpha.capacity() < static_cast<unsigned long>(x.size())) - alpha.reserve(x.size()*2); - - new_idx = alpha.size(); - - // Make sure alpha has the same length as x. So pad with extra zeros if - // necessary to make this happen. - alpha.resize(x.size(),0); - - - if (new_dims != dims) - { - // The only valid way the dimensions can be different here is if - // you are using a sparse vector type. This is because we might - // have had training samples which just happened to not include all - // the features previously. Therefore, max_index_plus_one() would - // have given too low of a result. But for dense vectors it is - // definitely a user error if the dimensions don't match. - - DLIB_CASSERT(is_matrix<sample_type>::value == false, - "\t decision_function svm_c_linear_dcd_trainer::train(x,y,state)" - << "\n\t The given state object is invalid because the training data dimensions have changed." - << "\n\t new_dims: " << new_dims - << "\n\t dims: " << dims - ); - - // extend w by the right number of elements - if (have_bias && !last_weight_1) - { - // Splice some zeros into the w vector so it will have the - // right length. Here we are being careful to move the bias - // weight to the end of the resulting vector. - w = join_cols(join_cols( - colm(w,0,dims), - zeros_matrix<scalar_type>(new_dims-dims,1)), - uniform_matrix<scalar_type>(1,1,w(dims)) - ); - } - else - { - // Just concatenate the right number of zeros. - w = join_cols(w, zeros_matrix<scalar_type>(new_dims-dims,1)); - } - dims = new_dims; - } - - } - else - { - did_init = true; - have_bias = have_bias_; - last_weight_1 = last_weight_1_; - dims = new_dims; - - alpha.resize(x.size()); - - index.reserve(x.size()); - Q.reserve(x.size()); - - if (have_bias && !last_weight_1) - w.set_size(dims+1); - else - w.set_size(dims); - - w = 0; - } - - for (long i = new_idx; i < x.size(); ++i) - { - Q.push_back(length_squared(x(i))); - - if (have_bias && !last_weight_1) - { - index.push_back(i); - Q.back() += 1; - } - else if (Q.back() != 0) - { - index.push_back(i); - } - - if (do_svm_l2_) - { - if (y(i) > 0) - Q.back() += 1/(2*Cpos); - else - Q.back() += 1/(2*Cneg); - } - } - - if (last_weight_1) - w(dims-1) = 1; - } - - template <typename T> - typename enable_if<is_matrix<T>,scalar_type>::type length_squared (const T& x) const - { - if (!last_weight_1) - { - return dlib::dot(x,x); - } - else - { - // skip the last dimension - return dlib::dot(colm(x,0,x.size()-1), - colm(x,0,x.size()-1)); - } - - } - - template <typename T> - typename disable_if<is_matrix<T>,scalar_type>::type length_squared (const T& x) const - { - if (!last_weight_1) - { - return dlib::dot(x,x); - } - else - { - scalar_type temp = 0; - typename T::const_iterator i; - for (i = x.begin(); i != x.end(); ++i) - { - // skip the last dimension - if (static_cast<long>(i->first) < dims-1) - temp += i->second*i->second; - } - return temp; - } - } - - - bool did_init; - bool have_bias; - bool last_weight_1; - std::vector<scalar_type> alpha; - scalar_vector_type w; - std::vector<scalar_type> Q; - std::vector<long> index; - long dims; - dlib::rand rnd; - - public: - - const std::vector<scalar_type>& get_alpha () const { return alpha; } - - friend void serialize(const optimizer_state& item, std::ostream& out) - { - const int version = 1; - dlib::serialize(version, out); - dlib::serialize(item.did_init, out); - dlib::serialize(item.have_bias, out); - dlib::serialize(item.last_weight_1, out); - dlib::serialize(item.alpha, out); - dlib::serialize(item.w, out); - dlib::serialize(item.Q, out); - dlib::serialize(item.index, out); - dlib::serialize(item.dims, out); - dlib::serialize(item.rnd, out); - } - - friend void deserialize(optimizer_state& item, std::istream& in) - { - int version = 0; - dlib::deserialize(version, in); - if (version != 1) - { - throw dlib::serialization_error( - "Error while deserializing dlib::svm_c_linear_dcd_trainer::optimizer_state, unexpected version." - ); - } - - dlib::deserialize(item.did_init, in); - dlib::deserialize(item.have_bias, in); - dlib::deserialize(item.last_weight_1, in); - dlib::deserialize(item.alpha, in); - dlib::deserialize(item.w, in); - dlib::deserialize(item.Q, in); - dlib::deserialize(item.index, in); - dlib::deserialize(item.dims, in); - dlib::deserialize(item.rnd, in); - } - - }; - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const - { - optimizer_state state; - return do_train(mat(x), mat(y), state); - } - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y, - optimizer_state& state - ) const - { - return do_train(mat(x), mat(y), state); - } - - private: - - // ------------------------------------------------------------------------------------ - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> do_train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y, - optimizer_state& state - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(is_learning_problem(x,y) == true, - "\t decision_function svm_c_linear_dcd_trainer::train(x,y)" - << "\n\t invalid inputs were given to this function" - << "\n\t x.size(): " << x.size() - << "\n\t y.size(): " << y.size() - << "\n\t is_learning_problem(x,y): " << is_learning_problem(x,y) - ); -#ifdef ENABLE_ASSERTS - for (long i = 0; i < x.size(); ++i) - { - DLIB_ASSERT(y(i) == +1 || y(i) == -1, - "\t decision_function svm_c_linear_dcd_trainer::train(x,y)" - << "\n\t invalid inputs were given to this function" - << "\n\t y("<<i<<"): " << y(i) - ); - } -#endif - - state.init(x,y,have_bias,last_weight_1,do_svm_l2,Cpos,Cneg); - - std::vector<scalar_type>& alpha = state.alpha; - scalar_vector_type& w = state.w; - std::vector<long>& index = state.index; - const long dims = state.dims; - - - unsigned long active_size = index.size(); - - scalar_type PG_max_prev = std::numeric_limits<scalar_type>::infinity(); - scalar_type PG_min_prev = -std::numeric_limits<scalar_type>::infinity(); - - const scalar_type Dii_pos = 1/(2*Cpos); - const scalar_type Dii_neg = 1/(2*Cneg); - - // main loop - for (unsigned long iter = 0; iter < max_iterations; ++iter) - { - scalar_type PG_max = -std::numeric_limits<scalar_type>::infinity(); - scalar_type PG_min = std::numeric_limits<scalar_type>::infinity(); - - // randomly shuffle the indices - for (unsigned long i = 0; i < active_size; ++i) - { - // pick a random index >= i - const long j = i + state.rnd.get_random_32bit_number()%(active_size-i); - std::swap(index[i], index[j]); - } - - // for all the active training samples - for (unsigned long ii = 0; ii < active_size; ++ii) - { - const long i = index[ii]; - - scalar_type G = y(i)*dot(w, x(i)) - 1; - if (do_svm_l2) - { - if (y(i) > 0) - G += Dii_pos*alpha[i]; - else - G += Dii_neg*alpha[i]; - } - const scalar_type C = (y(i) > 0) ? Cpos : Cneg; - const scalar_type U = do_svm_l2 ? std::numeric_limits<scalar_type>::infinity() : C; - - scalar_type PG = 0; - if (alpha[i] == 0) - { - if (G > PG_max_prev) - { - // shrink the active set of training examples - --active_size; - std::swap(index[ii], index[active_size]); - --ii; - continue; - } - - if (G < 0) - PG = G; - } - else if (alpha[i] == U) - { - if (G < PG_min_prev) - { - // shrink the active set of training examples - --active_size; - std::swap(index[ii], index[active_size]); - --ii; - continue; - } - - if (G > 0) - PG = G; - } - else - { - PG = G; - } - - if (PG > PG_max) - PG_max = PG; - if (PG < PG_min) - PG_min = PG; - - // if PG != 0 - if (std::abs(PG) > 1e-12) - { - const scalar_type alpha_old = alpha[i]; - alpha[i] = std::min(std::max(alpha[i] - G/state.Q[i], (scalar_type)0.0), U); - const scalar_type delta = (alpha[i]-alpha_old)*y(i); - add_to(w, x(i), delta); - if (have_bias && !last_weight_1) - w(w.size()-1) -= delta; - - if (last_weight_1) - w(dims-1) = 1; - } - - } - - if (verbose) - { - using namespace std; - cout << "gap: " << PG_max - PG_min << endl; - cout << "active_size: " << active_size << endl; - cout << "iter: " << iter << endl; - cout << endl; - } - - if (PG_max - PG_min <= eps) - { - // stop if we are within eps tolerance and the last iteration - // was over all the samples - if (active_size == index.size()) - break; - - // Turn off shrinking on the next iteration. We will stop if the - // tolerance is still <= eps when shrinking is off. - active_size = index.size(); - PG_max_prev = std::numeric_limits<scalar_type>::infinity(); - PG_min_prev = -std::numeric_limits<scalar_type>::infinity(); - } - else if (do_shrinking) - { - PG_max_prev = PG_max; - PG_min_prev = PG_min; - if (PG_max_prev <= 0) - PG_max_prev = std::numeric_limits<scalar_type>::infinity(); - if (PG_min_prev >= 0) - PG_min_prev = -std::numeric_limits<scalar_type>::infinity(); - } - - } // end of main optimization loop - - - - - // put the solution into a decision function and then return it - decision_function<kernel_type> df; - if (have_bias && !last_weight_1) - df.b = w(w.size()-1); - else - df.b = 0; - - df.basis_vectors.set_size(1); - // Copy the plane normal into the output basis vector. The output vector might - // be a sparse vector container so we need to use this special kind of copy to - // handle that case. - assign(df.basis_vectors(0), colm(w, 0, dims)); - df.alpha.set_size(1); - df.alpha(0) = 1; - - return df; - } - - scalar_type dot ( - const scalar_vector_type& w, - const sample_type& sample - ) const - { - if (have_bias && !last_weight_1) - { - const long w_size_m1 = w.size()-1; - return dlib::dot(colm(w,0,w_size_m1), sample) - w(w_size_m1); - } - else - { - return dlib::dot(w, sample); - } - } - - // ------------------------------------------------------------------------------------ - - scalar_type Cpos; - scalar_type Cneg; - scalar_type eps; - unsigned long max_iterations; - bool verbose; - bool have_bias; // having a bias means we pretend all x vectors have an extra element which is always -1. - bool last_weight_1; - bool do_shrinking; - bool do_svm_l2; - - }; // end of class svm_c_linear_dcd_trainer - -// ---------------------------------------------------------------------------------------- - - -} - -#endif // DLIB_SVm_C_LINEAR_DCD_TRAINER_Hh_ - - diff --git a/ml/dlib/dlib/svm/svm_c_linear_dcd_trainer_abstract.h b/ml/dlib/dlib/svm/svm_c_linear_dcd_trainer_abstract.h deleted file mode 100644 index b57c54260..000000000 --- a/ml/dlib/dlib/svm/svm_c_linear_dcd_trainer_abstract.h +++ /dev/null @@ -1,382 +0,0 @@ -// Copyright (C) 2012 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_SVm_C_LINEAR_DCD_TRAINER_ABSTRACT_Hh_ -#ifdef DLIB_SVm_C_LINEAR_DCD_TRAINER_ABSTRACT_Hh_ - -#include "function_abstract.h" -#include "kernel_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename K - > - class svm_c_linear_dcd_trainer - { - /*! - REQUIREMENTS ON K - Is either linear_kernel or sparse_linear_kernel. - - WHAT THIS OBJECT REPRESENTS - This object represents a tool for training the C formulation of a support - vector machine. It is optimized for the case where linear kernels are - used. - - - In particular, it is implemented using the algorithm described in the - following paper: - A Dual Coordinate Descent Method for Large-scale Linear SVM - by Cho-Jui Hsieh, Kai-Wei Chang, and Chih-Jen Lin - - It solves the optimization problem of: - min_w: 0.5||w||^2 + C*sum_i (hinge loss for sample i) - where w is the learned SVM parameter vector. - - Note that this object is very similar to the svm_c_linear_trainer, however, - it interprets the C parameter slightly differently. In particular, C for - the DCD trainer is not automatically divided by the number of samples like - it is with the svm_c_linear_trainer. For example, a C value of 10 when - given to the svm_c_linear_trainer is equivalent to a C value of 10/N for - the svm_c_linear_dcd_trainer, where N is the number of training samples. - !*/ - - public: - typedef K kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef decision_function<kernel_type> trained_function_type; - typedef typename decision_function<K>::sample_vector_type sample_vector_type; - typedef typename decision_function<K>::scalar_vector_type scalar_vector_type; - - - svm_c_linear_dcd_trainer ( - ); - /*! - ensures - - This object is properly initialized and ready to be used to train a - support vector machine. - - #get_c_class1() == 1 - - #get_c_class2() == 1 - - #get_epsilon() == 0.1 - - #get_max_iterations() == 10000 - - This object will not be verbose unless be_verbose() is called - - #forces_last_weight_to_1() == false - - #includes_bias() == true - - #shrinking_enabled() == true - - #solving_svm_l2_problem() == false - !*/ - - explicit svm_c_linear_dcd_trainer ( - const scalar_type& C - ); - /*! - requires - - C > 0 - ensures - - This object is properly initialized and ready to be used to train a - support vector machine. - - #get_c_class1() == C - - #get_c_class2() == C - - #get_epsilon() == 0.1 - - #get_max_iterations() == 10000 - - This object will not be verbose unless be_verbose() is called - - #forces_last_weight_to_1() == false - - #includes_bias() == true - - #shrinking_enabled() == true - - #solving_svm_l2_problem() == false - !*/ - - bool includes_bias ( - ) const; - /*! - ensures - - returns true if this trainer will produce decision_functions with - non-zero bias values. - !*/ - - void include_bias ( - bool should_have_bias - ); - /*! - ensures - - #includes_bias() == should_have_bias - !*/ - - bool forces_last_weight_to_1 ( - ) const; - /*! - ensures - - returns true if this trainer has the constraint that the last weight in - the learned parameter vector must be 1. This is the weight corresponding - to the feature in the training vectors with the highest dimension. - - Forcing the last weight to 1 also disables the bias and therefore the b - field of the learned decision_function will be 0 when forces_last_weight_to_1() == true. - This is true regardless of the setting of #include_bias(). - !*/ - - void force_last_weight_to_1 ( - bool should_last_weight_be_1 - ); - /*! - ensures - - #forces_last_weight_to_1() == should_last_weight_be_1 - !*/ - - bool shrinking_enabled ( - ) const; - /*! - ensures - - returns true if the shrinking heuristic is enabled. Typically this makes - the algorithm run a lot faster so it should be enabled. - !*/ - - void enable_shrinking ( - bool enabled - ); - /*! - ensures - - #shrinking_enabled() == enabled - !*/ - - bool solving_svm_l2_problem ( - ) const; - /*! - ensures - - returns true if this solver will solve the L2 version of the SVM - objective function. That is, if solving_svm_l2_problem()==true then this - object, rather than using the hinge loss, uses the squared hinge loss. - !*/ - - void solve_svm_l2_problem ( - bool enabled - ); - /*! - ensures - - #solving_svm_l2_problem() == enabled - !*/ - - void be_verbose ( - ); - /*! - ensures - - This object will print status messages to standard out so that a - user can observe the progress of the algorithm. - !*/ - - void be_quiet ( - ); - /*! - ensures - - this object will not print anything to standard out - !*/ - - void set_epsilon ( - scalar_type eps_ - ); - /*! - requires - - eps > 0 - ensures - - #get_epsilon() == eps - !*/ - - const scalar_type get_epsilon ( - ) const; - /*! - ensures - - returns the error epsilon that determines when training should stop. - Smaller values may result in a more accurate solution but take longer to - train. - !*/ - - const kernel_type& get_kernel ( - ) const; - /*! - ensures - - returns a copy of the kernel function in use by this object. Since the - linear kernels don't have any parameters this function just returns - kernel_type() - !*/ - - unsigned long get_max_iterations ( - ) const; - /*! - ensures - - returns the maximum number of iterations the SVM optimizer is allowed to - run before it is required to stop and return a result. - !*/ - - void set_max_iterations ( - unsigned long max_iter - ); - /*! - ensures - - #get_max_iterations() == max_iter - !*/ - - void set_c ( - scalar_type C - ); - /*! - requires - - C > 0 - ensures - - #get_c_class1() == C - - #get_c_class2() == C - !*/ - - const scalar_type get_c_class1 ( - ) const; - /*! - ensures - - returns the SVM regularization parameter for the +1 class. It is the - parameter that determines the trade off between trying to fit the +1 - training data exactly or allowing more errors but hopefully improving the - generalization of the resulting classifier. Larger values encourage - exact fitting while smaller values of C may encourage better - generalization. - !*/ - - const scalar_type get_c_class2 ( - ) const; - /*! - ensures - - returns the SVM regularization parameter for the -1 class. It is the - parameter that determines the trade off between trying to fit the -1 - training data exactly or allowing more errors but hopefully improving the - generalization of the resulting classifier. Larger values encourage - exact fitting while smaller values of C may encourage better - generalization. - !*/ - - void set_c_class1 ( - scalar_type C - ); - /*! - requires - - C > 0 - ensures - - #get_c_class1() == C - !*/ - - void set_c_class2 ( - scalar_type C - ); - /*! - requires - - C > 0 - ensures - - #get_c_class2() == C - !*/ - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const; - /*! - requires - - is_learning_problem(x,y) == true - (Note that it is ok for x.size() == 1) - - All elements of y must be equal to +1 or -1 - - x == a matrix or something convertible to a matrix via mat(). - Also, x should contain sample_type objects. - - y == a matrix or something convertible to a matrix via mat(). - Also, y should contain scalar_type objects. - ensures - - Trains a C support vector classifier given the training samples in x and - labels in y. - - returns a decision function F with the following properties: - - F.alpha.size() == 1 - - F.basis_vectors.size() == 1 - - F.alpha(0) == 1 - - if (new_x is a sample predicted have +1 label) then - - F(new_x) >= 0 - - else - - F(new_x) < 0 - !*/ - - // optimizer_state is used to record the internal state of the SVM optimizer. It - // can be used with the following train() routine to warm-start the optimizer or - // access the optimal alpha values (see the Hsieh paper mentioned above). The - // optimizer_state objects are serializable and allow you to get the alphas, but - // are otherwise completely opaque to the user. - class optimizer_state - { - public: - const std::vector<scalar_type>& get_alpha ( - ) const; - }; - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y, - optimizer_state& state - ) const; - /*! - requires - - is_learning_problem(x,y) == true - (Note that it is ok for x.size() == 1) - - All elements of y must be equal to +1 or -1 - - state must be either a default initialized optimizer_state object or all the - following conditions must be satisfied: - - Let LAST denote the previous trainer used with the state object, then - we must have: - - LAST.includes_bias() == includes_bias() - - LAST.forces_last_weight_to_1() == forces_last_weight_to_1() - - Let X denote the previous training samples used with state, then the - following must be satisfied: - - x.size() >= X.size() - - for all valid i: - - x(i) == X(i) - (i.e. the samples x and X have in common must be identical. - That is, the only allowed difference between x and X is that - x might have new training samples appended onto its end) - - if (x contains dense vectors) then - - max_index_plus_one(x) == max_index_plus_one(X) - - else - - max_index_plus_one(x) >= max_index_plus_one(X) - - x == a matrix or something convertible to a matrix via mat(). - Also, x should contain sample_type objects. - - y == a matrix or something convertible to a matrix via mat(). - Also, y should contain scalar_type objects. - ensures - - Trains a C support vector classifier given the training samples in x and - labels in y. - - The point of the state object is to allow you to warm start the SVM - optimizer from the solution to a previous call to train(). Doing this - might make the training run faster. This is useful when you are trying - different C values or have grown the training set and want to retrain. - - #state == the internal state of the optimizer at the solution to the SVM - problem. Therefore, passing #state to a new call to train() will start - the optimizer from the current solution. - - #state.get_alpha().size() == x.size() - - #state.get_alpha() == the optimal alpha/dual values learned by the optimizer. - - returns a decision function F with the following properties: - - F.alpha.size() == 1 - - F.basis_vectors.size() == 1 - - F.alpha(0) == 1 - - if (new_x is a sample predicted have +1 label) then - - F(new_x) >= 0 - - else - - F(new_x) < 0 - !*/ - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SVm_C_LINEAR_DCD_TRAINER_ABSTRACT_Hh_ - diff --git a/ml/dlib/dlib/svm/svm_c_linear_trainer.h b/ml/dlib/dlib/svm/svm_c_linear_trainer.h deleted file mode 100644 index 8d136d711..000000000 --- a/ml/dlib/dlib/svm/svm_c_linear_trainer.h +++ /dev/null @@ -1,706 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_SVM_C_LiNEAR_TRAINER_Hh_ -#define DLIB_SVM_C_LiNEAR_TRAINER_Hh_ - -#include "svm_c_linear_trainer_abstract.h" -#include "../algs.h" -#include "../optimization.h" -#include "../matrix.h" -#include "function.h" -#include "kernel.h" -#include <iostream> -#include <vector> -#include "sparse_vector.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type, - typename in_sample_vector_type, - typename in_scalar_vector_type - > - class oca_problem_c_svm : public oca_problem<matrix_type > - { - public: - /* - This class is used as part of the implementation of the svm_c_linear_trainer - defined towards the end of this file. - - - The bias parameter is dealt with by imagining that each sample vector has -1 - as its last element. - */ - - typedef typename matrix_type::type scalar_type; - - oca_problem_c_svm( - const scalar_type C_pos, - const scalar_type C_neg, - const in_sample_vector_type& samples_, - const in_scalar_vector_type& labels_, - const bool be_verbose_, - const scalar_type eps_, - const unsigned long max_iter, - const unsigned long dims_ - ) : - samples(samples_), - labels(labels_), - C(std::min(C_pos,C_neg)), - Cpos(C_pos/C), - Cneg(C_neg/C), - be_verbose(be_verbose_), - eps(eps_), - max_iterations(max_iter), - dims(dims_) - { - dot_prods.resize(samples.size()); - is_first_call = true; - } - - virtual scalar_type get_c ( - ) const - { - return C; - } - - virtual long get_num_dimensions ( - ) const - { - // plus 1 for the bias term - return dims + 1; - } - - virtual bool optimization_status ( - scalar_type current_objective_value, - scalar_type current_error_gap, - scalar_type current_risk_value, - scalar_type current_risk_gap, - unsigned long num_cutting_planes, - unsigned long num_iterations - ) const - { - if (be_verbose) - { - using namespace std; - cout << "objective: " << current_objective_value << endl; - cout << "objective gap: " << current_error_gap << endl; - cout << "risk: " << current_risk_value << endl; - cout << "risk gap: " << current_risk_gap << endl; - cout << "num planes: " << num_cutting_planes << endl; - cout << "iter: " << num_iterations << endl; - cout << endl; - } - - if (num_iterations >= max_iterations) - return true; - - if (current_risk_gap < eps) - return true; - - return false; - } - - virtual bool risk_has_lower_bound ( - scalar_type& lower_bound - ) const - { - lower_bound = 0; - return true; - } - - virtual void get_risk ( - matrix_type& w, - scalar_type& risk, - matrix_type& subgradient - ) const - { - line_search(w); - - subgradient.set_size(w.size(),1); - subgradient = 0; - risk = 0; - - - // loop over all the samples and compute the risk and its subgradient at the current solution point w - for (long i = 0; i < samples.size(); ++i) - { - // multiply current SVM output for the ith sample by its label - const scalar_type df_val = labels(i)*dot_prods[i]; - - if (labels(i) > 0) - risk += Cpos*std::max<scalar_type>(0.0,1 - df_val); - else - risk += Cneg*std::max<scalar_type>(0.0,1 - df_val); - - if (df_val < 1) - { - if (labels(i) > 0) - { - subtract_from(subgradient, samples(i), Cpos); - - subgradient(subgradient.size()-1) += Cpos; - } - else - { - add_to(subgradient, samples(i), Cneg); - - subgradient(subgradient.size()-1) -= Cneg; - } - } - } - - scalar_type scale = 1.0/samples.size(); - - risk *= scale; - subgradient = scale*subgradient; - } - - private: - - // ----------------------------------------------------- - // ----------------------------------------------------- - - void line_search ( - matrix_type& w - ) const - /*! - ensures - - does a line search to find a better w - - for all i: #dot_prods[i] == dot(colm(#w,0,w.size()-1), samples(i)) - #w(w.size()-1) - !*/ - { - // The reason for using w_size_m1 and not just w.size()-1 is because - // doing it this way avoids an inane warning from gcc that can occur in some cases. - const long w_size_m1 = w.size()-1; - for (long i = 0; i < samples.size(); ++i) - dot_prods[i] = dot(colm(w,0,w_size_m1), samples(i)) - w(w_size_m1); - - if (is_first_call) - { - is_first_call = false; - best_so_far = w; - dot_prods_best = dot_prods; - } - else - { - // do line search going from best_so_far to w. Store results in w. - // Here we use the line search algorithm presented in section 3.1.1 of Franc and Sonnenburg. - - const scalar_type A0 = length_squared(best_so_far - w); - const scalar_type BB0 = dot(best_so_far, w - best_so_far); - - const scalar_type scale_pos = (get_c()*Cpos)/samples.size(); - const scalar_type scale_neg = (get_c()*Cneg)/samples.size(); - - ks.clear(); - ks.reserve(samples.size()); - - scalar_type f0 = BB0; - for (long i = 0; i < samples.size(); ++i) - { - const scalar_type& scale = (labels(i)>0) ? scale_pos : scale_neg; - - const scalar_type B = scale*labels(i) * ( dot_prods_best[i] - dot_prods[i]); - const scalar_type C = scale*(1 - labels(i)* dot_prods_best[i]); - // Note that if B is 0 then it doesn't matter what k is set to. So 0 is fine. - scalar_type k = 0; - if (B != 0) - k = -C/B; - - if (k > 0) - ks.push_back(helper(k, std::abs(B))); - - if ( (B < 0 && k > 0) || (B > 0 && k <= 0) ) - f0 += B; - } - - scalar_type opt_k = 1; - // ks.size() == 0 shouldn't happen but check anyway - if (f0 >= 0 || ks.size() == 0) - { - // Getting here means that we aren't searching in a descent direction. - // We could take a zero step but instead lets just assign w to the new best - // so far point just to make sure we don't get stuck coming back to this - // case over and over. This might happen if we never move the best point - // seen so far. - - // So we let opt_k be 1 - } - else - { - std::sort(ks.begin(), ks.end()); - - // figure out where f0 goes positive. - for (unsigned long i = 0; i < ks.size(); ++i) - { - f0 += ks[i].B; - if (f0 + A0*ks[i].k >= 0) - { - opt_k = ks[i].k; - break; - } - } - - } - - // Don't let the step size get too big. Otherwise we might pick huge steps - // over and over that don't improve the cutting plane approximation. - if (opt_k > 1.0) - { - opt_k = 1.0; - } - - // take the step suggested by the line search - best_so_far = (1-opt_k)*best_so_far + opt_k*w; - - // update best_so_far dot products - for (unsigned long i = 0; i < dot_prods_best.size(); ++i) - dot_prods_best[i] = (1-opt_k)*dot_prods_best[i] + opt_k*dot_prods[i]; - - - const scalar_type mu = 0.1; - // Make sure we always take a little bit of a step towards w regardless of what the - // line search says to do. We do this since it is possible that some steps won't - // advance the best_so_far point. So this ensures we always make some progress each - // iteration. - w = (1-mu)*best_so_far + mu*w; - - // update dot products - for (unsigned long i = 0; i < dot_prods.size(); ++i) - dot_prods[i] = (1-mu)*dot_prods_best[i] + mu*dot_prods[i]; - } - } - - struct helper - { - helper(scalar_type k_, scalar_type B_) : k(k_), B(B_) {} - scalar_type k; - scalar_type B; - - bool operator< (const helper& item) const { return k < item.k; } - }; - - mutable std::vector<helper> ks; - - mutable bool is_first_call; - mutable std::vector<scalar_type> dot_prods; - - mutable matrix_type best_so_far; // best w seen so far - mutable std::vector<scalar_type> dot_prods_best; // dot products between best_so_far and samples - - - const in_sample_vector_type& samples; - const in_scalar_vector_type& labels; - const scalar_type C; - const scalar_type Cpos; - const scalar_type Cneg; - - const bool be_verbose; - const scalar_type eps; - const unsigned long max_iterations; - const unsigned long dims; - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type, - typename in_sample_vector_type, - typename in_scalar_vector_type, - typename scalar_type - > - oca_problem_c_svm<matrix_type, in_sample_vector_type, in_scalar_vector_type> make_oca_problem_c_svm ( - const scalar_type C_pos, - const scalar_type C_neg, - const in_sample_vector_type& samples, - const in_scalar_vector_type& labels, - const bool be_verbose, - const scalar_type eps, - const unsigned long max_iterations, - const unsigned long dims - ) - { - return oca_problem_c_svm<matrix_type, in_sample_vector_type, in_scalar_vector_type>( - C_pos, C_neg, samples, labels, be_verbose, eps, max_iterations, dims); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename K - > - class svm_c_linear_trainer - { - - public: - typedef K kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef decision_function<kernel_type> trained_function_type; - - // You are getting a compiler error on this line because you supplied a non-linear kernel - // to the svm_c_linear_trainer object. You have to use one of the linear kernels with this - // trainer. - COMPILE_TIME_ASSERT((is_same_type<K, linear_kernel<sample_type> >::value || - is_same_type<K, sparse_linear_kernel<sample_type> >::value )); - - svm_c_linear_trainer ( - ) - { - Cpos = 1; - Cneg = 1; - verbose = false; - eps = 0.001; - max_iterations = 10000; - learn_nonnegative_weights = false; - last_weight_1 = false; - } - - explicit svm_c_linear_trainer ( - const scalar_type& C - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(C > 0, - "\t svm_c_linear_trainer::svm_c_linear_trainer()" - << "\n\t C must be greater than 0" - << "\n\t C: " << C - << "\n\t this: " << this - ); - - Cpos = C; - Cneg = C; - verbose = false; - eps = 0.001; - max_iterations = 10000; - learn_nonnegative_weights = false; - last_weight_1 = false; - } - - void set_epsilon ( - scalar_type eps_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(eps_ > 0, - "\t void svm_c_linear_trainer::set_epsilon()" - << "\n\t eps_ must be greater than 0" - << "\n\t eps_: " << eps_ - << "\n\t this: " << this - ); - - eps = eps_; - } - - const scalar_type get_epsilon ( - ) const { return eps; } - - unsigned long get_max_iterations ( - ) const { return max_iterations; } - - void set_max_iterations ( - unsigned long max_iter - ) - { - max_iterations = max_iter; - } - - void be_verbose ( - ) - { - verbose = true; - } - - void be_quiet ( - ) - { - verbose = false; - } - - void set_oca ( - const oca& item - ) - { - solver = item; - } - - const oca get_oca ( - ) const - { - return solver; - } - - const kernel_type get_kernel ( - ) const - { - return kernel_type(); - } - - bool learns_nonnegative_weights ( - ) const { return learn_nonnegative_weights; } - - void set_learns_nonnegative_weights ( - bool value - ) - { - learn_nonnegative_weights = value; - if (learn_nonnegative_weights) - prior.set_size(0); - } - - bool forces_last_weight_to_1 ( - ) const - { - return last_weight_1; - } - - void force_last_weight_to_1 ( - bool should_last_weight_be_1 - ) - { - last_weight_1 = should_last_weight_be_1; - if (last_weight_1) - prior.set_size(0); - } - - void set_prior ( - const trained_function_type& prior_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(prior_.basis_vectors.size() == 1 && - prior_.alpha(0) == 1, - "\t void svm_c_linear_trainer::set_prior()" - << "\n\t The supplied prior could not have been created by this object's train() method." - << "\n\t prior_.basis_vectors.size(): " << prior_.basis_vectors.size() - << "\n\t prior_.alpha(0): " << prior_.alpha(0) - << "\n\t this: " << this - ); - - prior = sparse_to_dense(prior_.basis_vectors(0)); - prior_b = prior_.b; - learn_nonnegative_weights = false; - last_weight_1 = false; - } - - bool has_prior ( - ) const - { - return prior.size() != 0; - } - - void set_c ( - scalar_type C - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(C > 0, - "\t void svm_c_linear_trainer::set_c()" - << "\n\t C must be greater than 0" - << "\n\t C: " << C - << "\n\t this: " << this - ); - - Cpos = C; - Cneg = C; - } - - const scalar_type get_c_class1 ( - ) const - { - return Cpos; - } - - const scalar_type get_c_class2 ( - ) const - { - return Cneg; - } - - void set_c_class1 ( - scalar_type C - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(C > 0, - "\t void svm_c_linear_trainer::set_c_class1()" - << "\n\t C must be greater than 0" - << "\n\t C: " << C - << "\n\t this: " << this - ); - - Cpos = C; - } - - void set_c_class2 ( - scalar_type C - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(C > 0, - "\t void svm_c_linear_trainer::set_c_class2()" - << "\n\t C must be greater than 0" - << "\n\t C: " << C - << "\n\t this: " << this - ); - - Cneg = C; - } - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const - { - scalar_type obj; - return do_train(mat(x),mat(y),obj); - } - - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y, - scalar_type& svm_objective - ) const - { - return do_train(mat(x),mat(y),svm_objective); - } - - private: - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> do_train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y, - scalar_type& svm_objective - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(is_learning_problem(x,y) == true, - "\t decision_function svm_c_linear_trainer::train(x,y)" - << "\n\t invalid inputs were given to this function" - << "\n\t x.nr(): " << x.nr() - << "\n\t y.nr(): " << y.nr() - << "\n\t x.nc(): " << x.nc() - << "\n\t y.nc(): " << y.nc() - << "\n\t is_learning_problem(x,y): " << is_learning_problem(x,y) - ); -#ifdef ENABLE_ASSERTS - for (long i = 0; i < x.size(); ++i) - { - DLIB_ASSERT(y(i) == +1 || y(i) == -1, - "\t decision_function svm_c_linear_trainer::train(x,y)" - << "\n\t invalid inputs were given to this function" - << "\n\t y("<<i<<"): " << y(i) - ); - } -#endif - - - typedef matrix<scalar_type,0,1> w_type; - w_type w; - - const unsigned long num_dims = max_index_plus_one(x); - - unsigned long num_nonnegative = 0; - if (learn_nonnegative_weights) - { - num_nonnegative = num_dims; - } - - unsigned long force_weight_1_idx = std::numeric_limits<unsigned long>::max(); - if (last_weight_1) - { - force_weight_1_idx = num_dims-1; - } - - - if (has_prior()) - { - if (is_matrix<sample_type>::value) - { - // make sure requires clause is not broken - DLIB_CASSERT(num_dims == (unsigned long)prior.size(), - "\t decision_function svm_c_linear_trainer::train(x,y)" - << "\n\t The dimension of the training vectors must match the dimension of\n" - << "\n\t those used to create the prior." - << "\n\t num_dims: " << num_dims - << "\n\t prior.size(): " << prior.size() - ); - } - const unsigned long dims = std::max(num_dims, (unsigned long)prior.size()); - // In the case of sparse sample vectors, it is possible that the input - // vector dimensionality is larger than the prior vector dimensionality. - // We need to check for this case and pad prior with zeros if it is the - // case. - matrix<scalar_type,0,1> prior_temp = join_cols(join_cols(prior, - zeros_matrix<scalar_type>(dims-prior.size(),1)), - mat(prior_b)); - - svm_objective = solver( - make_oca_problem_c_svm<w_type>(Cpos, Cneg, x, y, verbose, eps, max_iterations, dims), - w, - prior_temp); - } - else - { - svm_objective = solver( - make_oca_problem_c_svm<w_type>(Cpos, Cneg, x, y, verbose, eps, max_iterations, num_dims), - w, - num_nonnegative, - force_weight_1_idx); - } - - // put the solution into a decision function and then return it - decision_function<kernel_type> df; - df.b = static_cast<scalar_type>(w(w.size()-1)); - df.basis_vectors.set_size(1); - // Copy the plane normal into the output basis vector. The output vector might be a - // sparse vector container so we need to use this special kind of copy to handle that case. - // As an aside, the reason for using max_index_plus_one() and not just w.size()-1 is because - // doing it this way avoids an inane warning from gcc that can occur in some cases. - const long out_size = max_index_plus_one(x); - assign(df.basis_vectors(0), matrix_cast<scalar_type>(colm(w, 0, out_size))); - df.alpha.set_size(1); - df.alpha(0) = 1; - - return df; - } - - scalar_type Cpos; - scalar_type Cneg; - oca solver; - scalar_type eps; - bool verbose; - unsigned long max_iterations; - bool learn_nonnegative_weights; - bool last_weight_1; - matrix<scalar_type,0,1> prior; - scalar_type prior_b = 0; - }; - -// ---------------------------------------------------------------------------------------- - -} - -// ---------------------------------------------------------------------------------------- - - -#endif // DLIB_SVM_C_LiNEAR_TRAINER_Hh_ - diff --git a/ml/dlib/dlib/svm/svm_c_linear_trainer_abstract.h b/ml/dlib/dlib/svm/svm_c_linear_trainer_abstract.h deleted file mode 100644 index 1b7a128f0..000000000 --- a/ml/dlib/dlib/svm/svm_c_linear_trainer_abstract.h +++ /dev/null @@ -1,359 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_SVM_C_LiNEAR_TRAINER_ABSTRACT_Hh_ -#ifdef DLIB_SVM_C_LiNEAR_TRAINER_ABSTRACT_Hh_ - -#include "../matrix/matrix_abstract.h" -#include "../algs.h" -#include "function_abstract.h" -#include "kernel_abstract.h" -#include "sparse_kernel_abstract.h" - -namespace dlib -{ - template < - typename K - > - class svm_c_linear_trainer - { - /*! - REQUIREMENTS ON K - Is either linear_kernel or sparse_linear_kernel. - - WHAT THIS OBJECT REPRESENTS - This object represents a tool for training the C formulation of - a support vector machine. It is optimized for the case where - linear kernels are used. - - - In particular, it is implemented using the OCAS algorithm - described in the following paper: - Optimized Cutting Plane Algorithm for Large-Scale Risk Minimization - Vojtech Franc, Soren Sonnenburg; Journal of Machine Learning - Research, 10(Oct):2157--2192, 2009. - !*/ - - public: - typedef K kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef decision_function<kernel_type> trained_function_type; - - svm_c_linear_trainer ( - ); - /*! - ensures - - This object is properly initialized and ready to be used - to train a support vector machine. - - #get_oca() == oca() (i.e. an instance of oca with default parameters) - - #get_c_class1() == 1 - - #get_c_class2() == 1 - - #get_epsilon() == 0.001 - - this object will not be verbose unless be_verbose() is called - - #get_max_iterations() == 10000 - - #learns_nonnegative_weights() == false - - #force_last_weight_to_1() == false - - #has_prior() == false - !*/ - - explicit svm_c_linear_trainer ( - const scalar_type& C - ); - /*! - requires - - C > 0 - ensures - - This object is properly initialized and ready to be used - to train a support vector machine. - - #get_oca() == oca() (i.e. an instance of oca with default parameters) - - #get_c_class1() == C - - #get_c_class2() == C - - #get_epsilon() == 0.001 - - this object will not be verbose unless be_verbose() is called - - #get_max_iterations() == 10000 - - #learns_nonnegative_weights() == false - - #force_last_weight_to_1() == false - - #has_prior() == false - !*/ - - void set_epsilon ( - scalar_type eps - ); - /*! - requires - - eps > 0 - ensures - - #get_epsilon() == eps - !*/ - - const scalar_type get_epsilon ( - ) const; - /*! - ensures - - returns the error epsilon that determines when training should stop. - Smaller values may result in a more accurate solution but take longer to - train. You can think of this epsilon value as saying "solve the - optimization problem until the probability of misclassification is within - epsilon of its optimal value". - !*/ - - void set_max_iterations ( - unsigned long max_iter - ); - /*! - ensures - - #get_max_iterations() == max_iter - !*/ - - unsigned long get_max_iterations ( - ); - /*! - ensures - - returns the maximum number of iterations the SVM optimizer is allowed to - run before it is required to stop and return a result. - !*/ - - void be_verbose ( - ); - /*! - ensures - - This object will print status messages to standard out so that a - user can observe the progress of the algorithm. - !*/ - - void be_quiet ( - ); - /*! - ensures - - this object will not print anything to standard out - !*/ - - void set_oca ( - const oca& item - ); - /*! - ensures - - #get_oca() == item - !*/ - - const oca get_oca ( - ) const; - /*! - ensures - - returns a copy of the optimizer used to solve the SVM problem. - !*/ - - const kernel_type get_kernel ( - ) const; - /*! - ensures - - returns a copy of the kernel function in use by this object. Since - the linear kernels don't have any parameters this function just - returns kernel_type() - !*/ - - bool learns_nonnegative_weights ( - ) const; - /*! - ensures - - The output of training is a weight vector and a bias value. These - two things define the resulting decision function. That is, the - decision function simply takes the dot product between the learned - weight vector and a test sample, then subtracts the bias value. - Therefore, if learns_nonnegative_weights() == true then the resulting - learned weight vector will always have non-negative entries. The - bias value may still be negative though. - !*/ - - void set_learns_nonnegative_weights ( - bool value - ); - /*! - ensures - - #learns_nonnegative_weights() == value - - if (value == true) then - - #has_prior() == false - !*/ - - void set_prior ( - const trained_function_type& prior - ); - /*! - requires - - prior == a function produced by a call to this class's train() function. - Therefore, it must be the case that: - - prior.basis_vectors.size() == 1 - - prior.alpha(0) == 1 - ensures - - Subsequent calls to train() will try to learn a function similar to the - given prior. - - #has_prior() == true - - #learns_nonnegative_weights() == false - - #forces_last_weight_to_1() == false - !*/ - - bool has_prior ( - ) const - /*! - ensures - - returns true if a prior has been set and false otherwise. Having a prior - set means that you have called set_prior() and supplied a previously - trained function as a reference. In this case, any call to train() will - try to learn a function that matches the behavior of the prior as close - as possible but also fits the supplied training data. In more technical - detail, having a prior means we replace the ||w||^2 regularizer with one - of the form ||w-prior||^2 where w is the set of parameters for a learned - function. - !*/ - - bool forces_last_weight_to_1 ( - ) const; - /*! - ensures - - returns true if this trainer has the constraint that the last weight in - the learned parameter vector must be 1. This is the weight corresponding - to the feature in the training vectors with the highest dimension. - - Forcing the last weight to 1 also disables the bias and therefore the b - field of the learned decision_function will be 0 when forces_last_weight_to_1() == true. - !*/ - - void force_last_weight_to_1 ( - bool should_last_weight_be_1 - ); - /*! - ensures - - #forces_last_weight_to_1() == should_last_weight_be_1 - - if (should_last_weight_be_1 == true) then - - #has_prior() == false - !*/ - - void set_c ( - scalar_type C - ); - /*! - requires - - C > 0 - ensures - - #get_c_class1() == C - - #get_c_class2() == C - !*/ - - const scalar_type get_c_class1 ( - ) const; - /*! - ensures - - returns the SVM regularization parameter for the +1 class. - It is the parameter that determines the trade off between - trying to fit the +1 training data exactly or allowing more errors - but hopefully improving the generalization of the resulting - classifier. Larger values encourage exact fitting while - smaller values of C may encourage better generalization. - !*/ - - const scalar_type get_c_class2 ( - ) const; - /*! - ensures - - returns the SVM regularization parameter for the -1 class. - It is the parameter that determines the trade off between - trying to fit the -1 training data exactly or allowing more errors - but hopefully improving the generalization of the resulting - classifier. Larger values encourage exact fitting while - smaller values of C may encourage better generalization. - !*/ - - void set_c_class1 ( - scalar_type C - ); - /*! - requires - - C > 0 - ensures - - #get_c_class1() == C - !*/ - - void set_c_class2 ( - scalar_type C - ); - /*! - requires - - C > 0 - ensures - - #get_c_class2() == C - !*/ - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const; - /*! - requires - - is_learning_problem(x,y) == true - (Note that it is ok for x.size() == 1) - - All elements of y must be equal to +1 or -1 - - x == a matrix or something convertible to a matrix via mat(). - Also, x should contain sample_type objects. - - y == a matrix or something convertible to a matrix via mat(). - Also, y should contain scalar_type objects. - - if (has_prior()) then - - The vectors in x must have the same dimensionality as the vectors - used to train the prior given to set_prior(). - ensures - - trains a C support vector classifier given the training samples in x and - labels in y. - - returns a decision function F with the following properties: - - F.alpha.size() == 1 - - F.basis_vectors.size() == 1 - - F.alpha(0) == 1 - - if (new_x is a sample predicted have +1 label) then - - F(new_x) >= 0 - - else - - F(new_x) < 0 - !*/ - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y, - scalar_type& svm_objective - ) const; - /*! - requires - - is_learning_problem(x,y) == true - (Note that it is ok for x.size() == 1) - - All elements of y must be equal to +1 or -1 - - x == a matrix or something convertible to a matrix via mat(). - Also, x should contain sample_type objects. - - y == a matrix or something convertible to a matrix via mat(). - Also, y should contain scalar_type objects. - - if (has_prior()) then - - The vectors in x must have the same dimensionality as the vectors - used to train the prior given to set_prior(). - ensures - - trains a C support vector classifier given the training samples in x and - labels in y. - - #svm_objective == the final value of the SVM objective function - - returns a decision function F with the following properties: - - F.alpha.size() == 1 - - F.basis_vectors.size() == 1 - - F.alpha(0) == 1 - - if (new_x is a sample predicted have +1 label) then - - F(new_x) >= 0 - - else - - F(new_x) < 0 - !*/ - - }; - -} - -#endif // DLIB_SVM_C_LiNEAR_TRAINER_ABSTRACT_Hh_ - diff --git a/ml/dlib/dlib/svm/svm_c_trainer.h b/ml/dlib/dlib/svm/svm_c_trainer.h deleted file mode 100644 index 14dcf3482..000000000 --- a/ml/dlib/dlib/svm/svm_c_trainer.h +++ /dev/null @@ -1,359 +0,0 @@ -// Copyright (C) 2007 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_SVm_C_TRAINER_Hh_ -#define DLIB_SVm_C_TRAINER_Hh_ - -//#include "local/make_label_kernel_matrix.h" - -#include "svm_c_trainer_abstract.h" -#include <cmath> -#include <limits> -#include <sstream> -#include "../matrix.h" -#include "../algs.h" - -#include "function.h" -#include "kernel.h" -#include "../optimization/optimization_solve_qp3_using_smo.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename K - > - class svm_c_trainer - { - public: - typedef K kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef decision_function<kernel_type> trained_function_type; - - svm_c_trainer ( - ) : - Cpos(1), - Cneg(1), - cache_size(200), - eps(0.001) - { - } - - svm_c_trainer ( - const kernel_type& kernel_, - const scalar_type& C_ - ) : - kernel_function(kernel_), - Cpos(C_), - Cneg(C_), - cache_size(200), - eps(0.001) - { - // make sure requires clause is not broken - DLIB_ASSERT(0 < C_, - "\tsvm_c_trainer::svm_c_trainer(kernel,C)" - << "\n\t invalid inputs were given to this function" - << "\n\t C_: " << C_ - ); - } - - void set_cache_size ( - long cache_size_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(cache_size_ > 0, - "\tvoid svm_c_trainer::set_cache_size(cache_size_)" - << "\n\t invalid inputs were given to this function" - << "\n\t cache_size: " << cache_size_ - ); - cache_size = cache_size_; - } - - long get_cache_size ( - ) const - { - return cache_size; - } - - void set_epsilon ( - scalar_type eps_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(eps_ > 0, - "\tvoid svm_c_trainer::set_epsilon(eps_)" - << "\n\t invalid inputs were given to this function" - << "\n\t eps_: " << eps_ - ); - eps = eps_; - } - - const scalar_type get_epsilon ( - ) const - { - return eps; - } - - void set_kernel ( - const kernel_type& k - ) - { - kernel_function = k; - } - - const kernel_type& get_kernel ( - ) const - { - return kernel_function; - } - - void set_c ( - scalar_type C - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(C > 0, - "\t void svm_c_trainer::set_c()" - << "\n\t C must be greater than 0" - << "\n\t C: " << C - << "\n\t this: " << this - ); - - Cpos = C; - Cneg = C; - } - - const scalar_type get_c_class1 ( - ) const - { - return Cpos; - } - - const scalar_type get_c_class2 ( - ) const - { - return Cneg; - } - - void set_c_class1 ( - scalar_type C - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(C > 0, - "\t void svm_c_trainer::set_c_class1()" - << "\n\t C must be greater than 0" - << "\n\t C: " << C - << "\n\t this: " << this - ); - - Cpos = C; - } - - void set_c_class2 ( - scalar_type C - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(C > 0, - "\t void svm_c_trainer::set_c_class2()" - << "\n\t C must be greater than 0" - << "\n\t C: " << C - << "\n\t this: " << this - ); - - Cneg = C; - } - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const - { - return do_train(mat(x), mat(y)); - } - - void swap ( - svm_c_trainer& item - ) - { - exchange(kernel_function, item.kernel_function); - exchange(Cpos, item.Cpos); - exchange(Cneg, item.Cneg); - exchange(cache_size, item.cache_size); - exchange(eps, item.eps); - } - - private: - - // ------------------------------------------------------------------------------------ - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> do_train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const - { - typedef typename K::scalar_type scalar_type; - typedef typename decision_function<K>::sample_vector_type sample_vector_type; - typedef typename decision_function<K>::scalar_vector_type scalar_vector_type; - - // make sure requires clause is not broken - DLIB_ASSERT(is_binary_classification_problem(x,y) == true, - "\tdecision_function svm_c_trainer::train(x,y)" - << "\n\t invalid inputs were given to this function" - << "\n\t x.nr(): " << x.nr() - << "\n\t y.nr(): " << y.nr() - << "\n\t x.nc(): " << x.nc() - << "\n\t y.nc(): " << y.nc() - << "\n\t is_binary_classification_problem(x,y): " << is_binary_classification_problem(x,y) - ); - - - scalar_vector_type alpha; - - solve_qp3_using_smo<scalar_vector_type> solver; - - solver(symmetric_matrix_cache<float>((diagm(y)*kernel_matrix(kernel_function,x)*diagm(y)), cache_size), - //solver(symmetric_matrix_cache<float>(make_label_kernel_matrix(kernel_matrix(kernel_function,x),y), cache_size), - uniform_matrix<scalar_type>(y.size(),1,-1), - y, - 0, - Cpos, - Cneg, - alpha, - eps); - - scalar_type b; - calculate_b(y,alpha,solver.get_gradient(),Cpos,Cneg,b); - alpha = pointwise_multiply(alpha,y); - - // count the number of support vectors - const long sv_count = (long)sum(alpha != 0); - - scalar_vector_type sv_alpha; - sample_vector_type support_vectors; - - // size these column vectors so that they have an entry for each support vector - sv_alpha.set_size(sv_count); - support_vectors.set_size(sv_count); - - // load the support vectors and their alpha values into these new column matrices - long idx = 0; - for (long i = 0; i < alpha.nr(); ++i) - { - if (alpha(i) != 0) - { - sv_alpha(idx) = alpha(i); - support_vectors(idx) = x(i); - ++idx; - } - } - - // now return the decision function - return decision_function<K> (sv_alpha, b, kernel_function, support_vectors); - } - - // ------------------------------------------------------------------------------------ - - template < - typename scalar_vector_type, - typename scalar_vector_type2 - > - void calculate_b( - const scalar_vector_type2& y, - const scalar_vector_type& alpha, - const scalar_vector_type& df, - const scalar_type& Cpos, - const scalar_type& Cneg, - scalar_type& b - ) const - { - using namespace std; - long num_free = 0; - scalar_type sum_free = 0; - - scalar_type upper_bound = -numeric_limits<scalar_type>::infinity(); - scalar_type lower_bound = numeric_limits<scalar_type>::infinity(); - - for(long i = 0; i < alpha.nr(); ++i) - { - if(y(i) == 1) - { - if(alpha(i) == Cpos) - { - if (df(i) > upper_bound) - upper_bound = df(i); - } - else if(alpha(i) == 0) - { - if (df(i) < lower_bound) - lower_bound = df(i); - } - else - { - ++num_free; - sum_free += df(i); - } - } - else - { - if(alpha(i) == Cneg) - { - if (-df(i) < lower_bound) - lower_bound = -df(i); - } - else if(alpha(i) == 0) - { - if (-df(i) > upper_bound) - upper_bound = -df(i); - } - else - { - ++num_free; - sum_free -= df(i); - } - } - } - - if(num_free > 0) - b = sum_free/num_free; - else - b = (upper_bound+lower_bound)/2; - } - - // ------------------------------------------------------------------------------------ - - - kernel_type kernel_function; - scalar_type Cpos; - scalar_type Cneg; - long cache_size; - scalar_type eps; - }; // end of class svm_c_trainer - -// ---------------------------------------------------------------------------------------- - - template <typename K> - void swap ( - svm_c_trainer<K>& a, - svm_c_trainer<K>& b - ) { a.swap(b); } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SVm_C_TRAINER_Hh_ - diff --git a/ml/dlib/dlib/svm/svm_c_trainer_abstract.h b/ml/dlib/dlib/svm/svm_c_trainer_abstract.h deleted file mode 100644 index 696cccdb7..000000000 --- a/ml/dlib/dlib/svm/svm_c_trainer_abstract.h +++ /dev/null @@ -1,237 +0,0 @@ -// Copyright (C) 2007 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_SVm_C_TRAINER_ABSTRACT_ -#ifdef DLIB_SVm_C_TRAINER_ABSTRACT_ - -#include <cmath> -#include <limits> -#include <sstream> -#include "../matrix/matrix_abstract.h" -#include "../algs.h" -#include "function_abstract.h" -#include "kernel_abstract.h" -#include "../optimization/optimization_solve_qp3_using_smo_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename K - > - class svm_c_trainer - { - /*! - REQUIREMENTS ON K - is a kernel function object as defined in dlib/svm/kernel_abstract.h - - WHAT THIS OBJECT REPRESENTS - This object implements a trainer for a C support vector machine for - solving binary classification problems. It is implemented using the SMO - algorithm. - - The implementation of the C-SVM training algorithm used by this object is based - on the following paper: - - Chih-Chung Chang and Chih-Jen Lin, LIBSVM : a library for support vector - machines, 2001. Software available at http://www.csie.ntu.edu.tw/~cjlin/libsvm - - !*/ - - public: - typedef K kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef decision_function<kernel_type> trained_function_type; - - svm_c_trainer ( - ); - /*! - ensures - - This object is properly initialized and ready to be used - to train a support vector machine. - - #get_c_class1() == 1 - - #get_c_class2() == 1 - - #get_cache_size() == 200 - - #get_epsilon() == 0.001 - !*/ - - svm_c_trainer ( - const kernel_type& kernel, - const scalar_type& C - ); - /*! - requires - - 0 < C - ensures - - This object is properly initialized and ready to be used - to train a support vector machine. - - #get_kernel() == kernel - - #get_c_class1() == C - - #get_c_class2() == C - - #get_cache_size() == 200 - - #get_epsilon() == 0.001 - !*/ - - void set_cache_size ( - long cache_size - ); - /*! - requires - - cache_size > 0 - ensures - - #get_cache_size() == cache_size - !*/ - - const long get_cache_size ( - ) const; - /*! - ensures - - returns the number of megabytes of cache this object will use - when it performs training via the this->train() function. - (bigger values of this may make training go faster but won't affect - the result. However, too big a value will cause you to run out of - memory, obviously.) - !*/ - - void set_epsilon ( - scalar_type eps - ); - /*! - requires - - eps > 0 - ensures - - #get_epsilon() == eps - !*/ - - const scalar_type get_epsilon ( - ) const; - /*! - ensures - - returns the error epsilon that determines when training should stop. - Generally a good value for this is 0.001. Smaller values may result - in a more accurate solution but take longer to execute. - !*/ - - void set_kernel ( - const kernel_type& k - ); - /*! - ensures - - #get_kernel() == k - !*/ - - const kernel_type& get_kernel ( - ) const; - /*! - ensures - - returns a copy of the kernel function in use by this object - !*/ - - void set_c ( - scalar_type C - ); - /*! - requires - - C > 0 - ensures - - #get_c_class1() == C - - #get_c_class2() == C - !*/ - - const scalar_type get_c_class1 ( - ) const; - /*! - ensures - - returns the SVM regularization parameter for the +1 class. - It is the parameter that determines the trade off between - trying to fit the +1 training data exactly or allowing more errors - but hopefully improving the generalization ability of the - resulting classifier. Larger values encourage exact fitting - while smaller values of C may encourage better generalization. - !*/ - - const scalar_type get_c_class2 ( - ) const; - /*! - ensures - - returns the SVM regularization parameter for the -1 class. - It is the parameter that determines the trade off between - trying to fit the -1 training data exactly or allowing more errors - but hopefully improving the generalization ability of the - resulting classifier. Larger values encourage exact fitting - while smaller values of C may encourage better generalization. - !*/ - - void set_c_class1 ( - scalar_type C - ); - /*! - requires - - C > 0 - ensures - - #get_c_class1() == C - !*/ - - void set_c_class2 ( - scalar_type C - ); - /*! - requires - - C > 0 - ensures - - #get_c_class2() == C - !*/ - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const; - /*! - requires - - is_binary_classification_problem(x,y) == true - - x == a matrix or something convertible to a matrix via mat(). - Also, x should contain sample_type objects. - - y == a matrix or something convertible to a matrix via mat(). - Also, y should contain scalar_type objects. - ensures - - trains a C support vector classifier given the training samples in x and - labels in y. Training is done when the error is less than get_epsilon(). - - returns a decision function F with the following properties: - - if (new_x is a sample predicted have +1 label) then - - F(new_x) >= 0 - - else - - F(new_x) < 0 - !*/ - - void swap ( - svm_c_trainer& item - ); - /*! - ensures - - swaps *this and item - !*/ - }; - - template <typename K> - void swap ( - svm_c_trainer<K>& a, - svm_c_trainer<K>& b - ) { a.swap(b); } - /*! - provides a global swap - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SVm_C_TRAINER_ABSTRACT_ - - - diff --git a/ml/dlib/dlib/svm/svm_multiclass_linear_trainer.h b/ml/dlib/dlib/svm/svm_multiclass_linear_trainer.h deleted file mode 100644 index 4727f7226..000000000 --- a/ml/dlib/dlib/svm/svm_multiclass_linear_trainer.h +++ /dev/null @@ -1,432 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_SVm_MULTICLASS_LINEAR_TRAINER_Hh_ -#define DLIB_SVm_MULTICLASS_LINEAR_TRAINER_Hh_ - -#include "svm_multiclass_linear_trainer_abstract.h" -#include "structural_svm_problem_threaded.h" -#include <vector> -#include "../optimization/optimization_oca.h" -#include "../matrix.h" -#include "sparse_vector.h" -#include "function.h" -#include <algorithm> - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type, - typename sample_type, - typename label_type - > - class multiclass_svm_problem : public structural_svm_problem_threaded<matrix_type, - std::vector<std::pair<unsigned long,typename matrix_type::type> > > - { - /*! - WHAT THIS OBJECT REPRESENTS - This object defines the optimization problem for the multiclass SVM trainer - object at the bottom of this file. - - The joint feature vectors used by this object, the PSI(x,y) vectors, are - defined as follows: - PSI(x,0) = [x,0,0,0,0, ...,0] - PSI(x,1) = [0,x,0,0,0, ...,0] - PSI(x,2) = [0,0,x,0,0, ...,0] - That is, if there are N labels then the joint feature vector has a - dimension that is N times the dimension of a single x sample. Also, - note that we append a -1 value onto each x to account for the bias term. - !*/ - - public: - typedef typename matrix_type::type scalar_type; - typedef std::vector<std::pair<unsigned long,scalar_type> > feature_vector_type; - - multiclass_svm_problem ( - const std::vector<sample_type>& samples_, - const std::vector<label_type>& labels_, - const std::vector<label_type>& distinct_labels_, - const unsigned long dims_, - const unsigned long num_threads - ) : - structural_svm_problem_threaded<matrix_type, std::vector<std::pair<unsigned long,typename matrix_type::type> > >(num_threads), - samples(samples_), - labels(labels_), - distinct_labels(distinct_labels_), - dims(dims_+1) // +1 for the bias - {} - - virtual long get_num_dimensions ( - ) const - { - return dims*distinct_labels.size(); - } - - virtual long get_num_samples ( - ) const - { - return static_cast<long>(samples.size()); - } - - virtual void get_truth_joint_feature_vector ( - long idx, - feature_vector_type& psi - ) const - { - assign(psi, samples[idx]); - // Add a constant -1 to account for the bias term. - psi.push_back(std::make_pair(dims-1,static_cast<scalar_type>(-1))); - - // Find which distinct label goes with this psi. - long label_idx = 0; - for (unsigned long i = 0; i < distinct_labels.size(); ++i) - { - if (distinct_labels[i] == labels[idx]) - { - label_idx = i; - break; - } - } - - offset_feature_vector(psi, dims*label_idx); - } - - virtual void separation_oracle ( - const long idx, - const matrix_type& current_solution, - scalar_type& loss, - feature_vector_type& psi - ) const - { - scalar_type best_val = -std::numeric_limits<scalar_type>::infinity(); - unsigned long best_idx = 0; - - // Figure out which label is the best. That is, what label maximizes - // LOSS(idx,y) + F(x,y). Note that y in this case is given by distinct_labels[i]. - for (unsigned long i = 0; i < distinct_labels.size(); ++i) - { - // Compute the F(x,y) part: - // perform: temp == dot(relevant part of current solution, samples[idx]) - current_bias - scalar_type temp = dot(mat(¤t_solution(i*dims),dims-1), samples[idx]) - current_solution((i+1)*dims-1); - - // Add the LOSS(idx,y) part: - if (labels[idx] != distinct_labels[i]) - temp += 1; - - // Now temp == LOSS(idx,y) + F(x,y). Check if it is the biggest we have seen. - if (temp > best_val) - { - best_val = temp; - best_idx = i; - } - } - - assign(psi, samples[idx]); - // add a constant -1 to account for the bias term - psi.push_back(std::make_pair(dims-1,static_cast<scalar_type>(-1))); - - offset_feature_vector(psi, dims*best_idx); - - if (distinct_labels[best_idx] == labels[idx]) - loss = 0; - else - loss = 1; - } - - private: - - void offset_feature_vector ( - feature_vector_type& sample, - const unsigned long val - ) const - { - if (val != 0) - { - for (typename feature_vector_type::iterator i = sample.begin(); i != sample.end(); ++i) - { - i->first += val; - } - } - } - - - const std::vector<sample_type>& samples; - const std::vector<label_type>& labels; - const std::vector<label_type>& distinct_labels; - const long dims; - }; - - -// ---------------------------------------------------------------------------------------- - - template < - typename K, - typename label_type_ = typename K::scalar_type - > - class svm_multiclass_linear_trainer - { - public: - typedef label_type_ label_type; - typedef K kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - - typedef multiclass_linear_decision_function<kernel_type, label_type> trained_function_type; - - - // You are getting a compiler error on this line because you supplied a non-linear kernel - // to the svm_c_linear_trainer object. You have to use one of the linear kernels with this - // trainer. - COMPILE_TIME_ASSERT((is_same_type<K, linear_kernel<sample_type> >::value || - is_same_type<K, sparse_linear_kernel<sample_type> >::value )); - - svm_multiclass_linear_trainer ( - ) : - num_threads(4), - C(1), - eps(0.001), - max_iterations(10000), - verbose(false), - learn_nonnegative_weights(false) - { - } - - void set_num_threads ( - unsigned long num - ) - { - num_threads = num; - } - - unsigned long get_num_threads ( - ) const - { - return num_threads; - } - - void set_epsilon ( - scalar_type eps_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(eps_ > 0, - "\t void svm_multiclass_linear_trainer::set_epsilon()" - << "\n\t eps_ must be greater than 0" - << "\n\t eps_: " << eps_ - << "\n\t this: " << this - ); - - eps = eps_; - } - - const scalar_type get_epsilon ( - ) const { return eps; } - - unsigned long get_max_iterations ( - ) const { return max_iterations; } - - void set_max_iterations ( - unsigned long max_iter - ) - { - max_iterations = max_iter; - } - - void be_verbose ( - ) - { - verbose = true; - } - - void be_quiet ( - ) - { - verbose = false; - } - - void set_oca ( - const oca& item - ) - { - solver = item; - } - - const oca get_oca ( - ) const - { - return solver; - } - - const kernel_type get_kernel ( - ) const - { - return kernel_type(); - } - - bool learns_nonnegative_weights ( - ) const { return learn_nonnegative_weights; } - - void set_learns_nonnegative_weights ( - bool value - ) - { - learn_nonnegative_weights = value; - if (learn_nonnegative_weights) - prior = trained_function_type(); - } - - void set_c ( - scalar_type C_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(C_ > 0, - "\t void svm_multiclass_linear_trainer::set_c()" - << "\n\t C must be greater than 0" - << "\n\t C_: " << C_ - << "\n\t this: " << this - ); - - C = C_; - } - - const scalar_type get_c ( - ) const - { - return C; - } - - void set_prior ( - const trained_function_type& prior_ - ) - { - prior = prior_; - learn_nonnegative_weights = false; - } - - bool has_prior ( - ) const - { - return prior.labels.size() != 0; - } - - trained_function_type train ( - const std::vector<sample_type>& all_samples, - const std::vector<label_type>& all_labels - ) const - { - scalar_type svm_objective = 0; - return train(all_samples, all_labels, svm_objective); - } - - trained_function_type train ( - const std::vector<sample_type>& all_samples, - const std::vector<label_type>& all_labels, - scalar_type& svm_objective - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(is_learning_problem(all_samples,all_labels), - "\t trained_function_type svm_multiclass_linear_trainer::train(all_samples,all_labels)" - << "\n\t invalid inputs were given to this function" - << "\n\t all_samples.size(): " << all_samples.size() - << "\n\t all_labels.size(): " << all_labels.size() - ); - - trained_function_type df; - df.labels = select_all_distinct_labels(all_labels); - if (has_prior()) - { - df.labels.insert(df.labels.end(), prior.labels.begin(), prior.labels.end()); - df.labels = select_all_distinct_labels(df.labels); - } - const long input_sample_dimensionality = max_index_plus_one(all_samples); - // If the samples are sparse then the right thing to do is to take the max - // dimensionality between the prior and the new samples. But if the samples - // are dense vectors then they definitely all have to have exactly the same - // dimensionality. - const long dims = std::max(df.weights.nc(),input_sample_dimensionality); - if (is_matrix<sample_type>::value && has_prior()) - { - DLIB_ASSERT(input_sample_dimensionality == prior.weights.nc(), - "\t trained_function_type svm_multiclass_linear_trainer::train(all_samples,all_labels)" - << "\n\t The training samples given to this function are not the same kind of training " - << "\n\t samples used to create the prior." - << "\n\t input_sample_dimensionality: " << input_sample_dimensionality - << "\n\t prior.weights.nc(): " << prior.weights.nc() - ); - } - - typedef matrix<scalar_type,0,1> w_type; - w_type weights; - multiclass_svm_problem<w_type, sample_type, label_type> problem(all_samples, all_labels, df.labels, dims, num_threads); - if (verbose) - problem.be_verbose(); - - problem.set_max_cache_size(0); - problem.set_c(C); - problem.set_epsilon(eps); - problem.set_max_iterations(max_iterations); - - unsigned long num_nonnegative = 0; - if (learn_nonnegative_weights) - { - num_nonnegative = problem.get_num_dimensions(); - } - - if (!has_prior()) - { - svm_objective = solver(problem, weights, num_nonnegative); - } - else - { - matrix<scalar_type> temp(df.labels.size(),dims); - w_type b(df.labels.size()); - temp = 0; - b = 0; - - const long pad_size = dims-prior.weights.nc(); - // Copy the prior into the temp and b matrices. We have to do this row - // by row copy because the new training data might have new labels we - // haven't seen before and therefore the sizes of these matrices could be - // different. - for (unsigned long i = 0; i < prior.labels.size(); ++i) - { - const long r = std::find(df.labels.begin(), df.labels.end(), prior.labels[i])-df.labels.begin(); - set_rowm(temp,r) = join_rows(rowm(prior.weights,i), zeros_matrix<scalar_type>(1,pad_size)); - b(r) = prior.b(i); - } - - const w_type prior_vect = reshape_to_column_vector(join_rows(temp,b)); - svm_objective = solver(problem, weights, prior_vect); - } - - - df.weights = colm(reshape(weights, df.labels.size(), dims+1), range(0,dims-1)); - df.b = colm(reshape(weights, df.labels.size(), dims+1), dims); - return df; - } - - private: - - unsigned long num_threads; - scalar_type C; - scalar_type eps; - unsigned long max_iterations; - bool verbose; - oca solver; - bool learn_nonnegative_weights; - - trained_function_type prior; - }; - -// ---------------------------------------------------------------------------------------- - -} - - -#endif // DLIB_SVm_MULTICLASS_LINEAR_TRAINER_Hh_ - diff --git a/ml/dlib/dlib/svm/svm_multiclass_linear_trainer_abstract.h b/ml/dlib/dlib/svm/svm_multiclass_linear_trainer_abstract.h deleted file mode 100644 index 6561ce7b2..000000000 --- a/ml/dlib/dlib/svm/svm_multiclass_linear_trainer_abstract.h +++ /dev/null @@ -1,275 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_SVm_MULTICLASS_LINEAR_TRAINER_ABSTRACT_Hh_ -#ifdef DLIB_SVm_MULTICLASS_LINEAR_TRAINER_ABSTRACT_Hh_ - -#include "../matrix/matrix_abstract.h" -#include "../algs.h" -#include "function_abstract.h" -#include "kernel_abstract.h" -#include "sparse_kernel_abstract.h" -#include "../optimization/optimization_oca_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename K, - typename label_type_ = typename K::scalar_type - > - class svm_multiclass_linear_trainer - { - /*! - REQUIREMENTS ON K - Is either linear_kernel or sparse_linear_kernel. - - REQUIREMENTS ON label_type_ - label_type_ must be default constructable, copyable, and comparable using - operator < and ==. It must also be possible to write it to an std::ostream - using operator<<. - - INITIAL VALUE - - get_num_threads() == 4 - - learns_nonnegative_weights() == false - - get_epsilon() == 0.001 - - get_max_iterations() == 10000 - - get_c() == 1 - - this object will not be verbose unless be_verbose() is called - - #get_oca() == oca() (i.e. an instance of oca with default parameters) - - has_prior() == false - - WHAT THIS OBJECT REPRESENTS - This object represents a tool for training a multiclass support - vector machine. It is optimized for the case where linear kernels - are used. - !*/ - - public: - typedef label_type_ label_type; - typedef K kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef multiclass_linear_decision_function<kernel_type, label_type> trained_function_type; - - svm_multiclass_linear_trainer ( - ); - /*! - ensures - - this object is properly initialized - !*/ - - void set_epsilon ( - scalar_type eps - ); - /*! - requires - - eps > 0 - ensures - - #get_epsilon() == eps - !*/ - - const scalar_type get_epsilon ( - ) const; - /*! - ensures - - returns the error epsilon that determines when training should stop. - Smaller values may result in a more accurate solution but take longer - to execute. - !*/ - - void set_max_iterations ( - unsigned long max_iter - ); - /*! - ensures - - #get_max_iterations() == max_iter - !*/ - - unsigned long get_max_iterations ( - ); - /*! - ensures - - returns the maximum number of iterations the SVM optimizer is allowed to - run before it is required to stop and return a result. - !*/ - - void be_verbose ( - ); - /*! - ensures - - This object will print status messages to standard out so that a - user can observe the progress of the algorithm. - !*/ - - void be_quiet ( - ); - /*! - ensures - - this object will not print anything to standard out - !*/ - - void set_oca ( - const oca& item - ); - /*! - ensures - - #get_oca() == item - !*/ - - const oca get_oca ( - ) const; - /*! - ensures - - returns a copy of the optimizer used to solve the SVM problem. - !*/ - - void set_num_threads ( - unsigned long num - ); - /*! - ensures - - #get_num_threads() == num - !*/ - - unsigned long get_num_threads ( - ) const; - /*! - ensures - - returns the number of threads used during training. You should - usually set this equal to the number of processing cores on your - machine. - !*/ - - const kernel_type get_kernel ( - ) const; - /*! - ensures - - returns a copy of the kernel function in use by this object. Since - the linear kernels don't have any parameters this function just - returns kernel_type() - !*/ - - void set_c ( - scalar_type C - ); - /*! - requires - - C > 0 - ensures - - #get_c() == C - !*/ - - const scalar_type get_c ( - ) const; - /*! - ensures - - returns the SVM regularization parameter. It is the parameter that - determines the trade off between trying to fit the training data - exactly or allowing more errors but hopefully improving the - generalization of the resulting classifier. Larger values encourage - exact fitting while smaller values of C may encourage better - generalization. - !*/ - - bool learns_nonnegative_weights ( - ) const; - /*! - ensures - - The output of training is a set of weights and bias values that together - define the behavior of a multiclass_linear_decision_function object. If - learns_nonnegative_weights() == true then the resulting weights and bias - values will always have non-negative values. That is, if this function - returns true then all the numbers in the multiclass_linear_decision_function - objects output by train() will be non-negative. - !*/ - - void set_learns_nonnegative_weights ( - bool value - ); - /*! - ensures - - #learns_nonnegative_weights() == value - - if (value == true) then - - #has_prior() == false - !*/ - - void set_prior ( - const trained_function_type& prior - ); - /*! - ensures - - Subsequent calls to train() will try to learn a function similar to the - given prior. - - #has_prior() == true - - #learns_nonnegative_weights() == false - !*/ - - bool has_prior ( - ) const - /*! - ensures - - returns true if a prior has been set and false otherwise. Having a prior - set means that you have called set_prior() and supplied a previously - trained function as a reference. In this case, any call to train() will - try to learn a function that matches the behavior of the prior as close - as possible but also fits the supplied training data. In more technical - detail, having a prior means we replace the ||w||^2 regularizer with one - of the form ||w-prior||^2 where w is the set of parameters for a learned - function. - !*/ - - trained_function_type train ( - const std::vector<sample_type>& all_samples, - const std::vector<label_type>& all_labels - ) const; - /*! - requires - - is_learning_problem(all_samples, all_labels) - - All the vectors in all_samples must have the same dimensionality. - - if (has_prior()) then - - The vectors in all_samples must have the same dimensionality as the - vectors used to train the prior given to set_prior(). - ensures - - trains a multiclass SVM to solve the given multiclass classification problem. - - returns a multiclass_linear_decision_function F with the following properties: - - if (new_x is a sample predicted to have a label of L) then - - F(new_x) == L - - F.get_labels() == select_all_distinct_labels(all_labels) - - F.number_of_classes() == select_all_distinct_labels(all_labels).size() - !*/ - - trained_function_type train ( - const std::vector<sample_type>& all_samples, - const std::vector<label_type>& all_labels, - scalar_type& svm_objective - ) const; - /*! - requires - - is_learning_problem(all_samples, all_labels) - - All the vectors in all_samples must have the same dimensionality. - - if (has_prior()) then - - The vectors in all_samples must have the same dimensionality as the - vectors used to train the prior given to set_prior(). - ensures - - trains a multiclass SVM to solve the given multiclass classification problem. - - returns a multiclass_linear_decision_function F with the following properties: - - if (new_x is a sample predicted to have a label of L) then - - F(new_x) == L - - F.get_labels() == select_all_distinct_labels(all_labels) - - F.number_of_classes() == select_all_distinct_labels(all_labels).size() - - #svm_objective == the final value of the SVM objective function - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - -} - - -#endif // DLIB_SVm_MULTICLASS_LINEAR_TRAINER_ABSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/svm/svm_nu_trainer.h b/ml/dlib/dlib/svm/svm_nu_trainer.h deleted file mode 100644 index 1e89d6efa..000000000 --- a/ml/dlib/dlib/svm/svm_nu_trainer.h +++ /dev/null @@ -1,326 +0,0 @@ -// Copyright (C) 2007 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_SVm_NU_TRAINER_Hh_ -#define DLIB_SVm_NU_TRAINER_Hh_ - -//#include "local/make_label_kernel_matrix.h" - -#include "svm_nu_trainer_abstract.h" -#include <cmath> -#include <limits> -#include <sstream> -#include "../matrix.h" -#include "../algs.h" -#include "../serialize.h" - -#include "function.h" -#include "kernel.h" -#include "../optimization/optimization_solve_qp2_using_smo.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename K - > - class svm_nu_trainer - { - public: - typedef K kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef decision_function<kernel_type> trained_function_type; - - svm_nu_trainer ( - ) : - nu(0.1), - cache_size(200), - eps(0.001) - { - } - - svm_nu_trainer ( - const kernel_type& kernel_, - const scalar_type& nu_ - ) : - kernel_function(kernel_), - nu(nu_), - cache_size(200), - eps(0.001) - { - // make sure requires clause is not broken - DLIB_ASSERT(0 < nu && nu <= 1, - "\tsvm_nu_trainer::svm_nu_trainer(kernel,nu)" - << "\n\t invalid inputs were given to this function" - << "\n\t nu: " << nu - ); - } - - void set_cache_size ( - long cache_size_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(cache_size_ > 0, - "\tvoid svm_nu_trainer::set_cache_size(cache_size_)" - << "\n\t invalid inputs were given to this function" - << "\n\t cache_size: " << cache_size_ - ); - cache_size = cache_size_; - } - - long get_cache_size ( - ) const - { - return cache_size; - } - - void set_epsilon ( - scalar_type eps_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(eps_ > 0, - "\tvoid svm_nu_trainer::set_epsilon(eps_)" - << "\n\t invalid inputs were given to this function" - << "\n\t eps: " << eps_ - ); - eps = eps_; - } - - const scalar_type get_epsilon ( - ) const - { - return eps; - } - - void set_kernel ( - const kernel_type& k - ) - { - kernel_function = k; - } - - const kernel_type& get_kernel ( - ) const - { - return kernel_function; - } - - void set_nu ( - scalar_type nu_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(0 < nu_ && nu_ <= 1, - "\tvoid svm_nu_trainer::set_nu(nu_)" - << "\n\t invalid inputs were given to this function" - << "\n\t nu: " << nu_ - ); - nu = nu_; - } - - const scalar_type get_nu ( - ) const - { - return nu; - } - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const - { - return do_train(mat(x), mat(y)); - } - - void swap ( - svm_nu_trainer& item - ) - { - exchange(kernel_function, item.kernel_function); - exchange(nu, item.nu); - exchange(cache_size, item.cache_size); - exchange(eps, item.eps); - } - - private: - - // ------------------------------------------------------------------------------------ - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> do_train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const - { - typedef typename K::scalar_type scalar_type; - typedef typename decision_function<K>::sample_vector_type sample_vector_type; - typedef typename decision_function<K>::scalar_vector_type scalar_vector_type; - - // make sure requires clause is not broken - DLIB_ASSERT(is_binary_classification_problem(x,y) == true, - "\tdecision_function svm_nu_trainer::train(x,y)" - << "\n\t invalid inputs were given to this function" - << "\n\t x.nr(): " << x.nr() - << "\n\t y.nr(): " << y.nr() - << "\n\t x.nc(): " << x.nc() - << "\n\t y.nc(): " << y.nc() - << "\n\t is_binary_classification_problem(x,y): " << is_binary_classification_problem(x,y) - ); - - - scalar_vector_type alpha; - - solve_qp2_using_smo<scalar_vector_type> solver; - - solver(symmetric_matrix_cache<float>((diagm(y)*kernel_matrix(kernel_function,x)*diagm(y)), cache_size), - //solver(symmetric_matrix_cache<float>(make_label_kernel_matrix(kernel_matrix(kernel_function,x),y), cache_size), - y, - nu, - alpha, - eps); - - scalar_type rho, b; - calculate_rho_and_b(y,alpha,solver.get_gradient(),rho,b); - alpha = pointwise_multiply(alpha,y)/rho; - - // count the number of support vectors - const long sv_count = (long)sum(alpha != 0); - - scalar_vector_type sv_alpha; - sample_vector_type support_vectors; - - // size these column vectors so that they have an entry for each support vector - sv_alpha.set_size(sv_count); - support_vectors.set_size(sv_count); - - // load the support vectors and their alpha values into these new column matrices - long idx = 0; - for (long i = 0; i < alpha.nr(); ++i) - { - if (alpha(i) != 0) - { - sv_alpha(idx) = alpha(i); - support_vectors(idx) = x(i); - ++idx; - } - } - - // now return the decision function - return decision_function<K> (sv_alpha, b, kernel_function, support_vectors); - } - - // ------------------------------------------------------------------------------------ - - template < - typename scalar_vector_type, - typename scalar_vector_type2, - typename scalar_type - > - void calculate_rho_and_b( - const scalar_vector_type2& y, - const scalar_vector_type& alpha, - const scalar_vector_type& df, - scalar_type& rho, - scalar_type& b - ) const - { - using namespace std; - long num_p_free = 0; - long num_n_free = 0; - scalar_type sum_p_free = 0; - scalar_type sum_n_free = 0; - - scalar_type upper_bound_p = -numeric_limits<scalar_type>::infinity(); - scalar_type upper_bound_n = -numeric_limits<scalar_type>::infinity(); - scalar_type lower_bound_p = numeric_limits<scalar_type>::infinity(); - scalar_type lower_bound_n = numeric_limits<scalar_type>::infinity(); - - for(long i = 0; i < alpha.nr(); ++i) - { - if(y(i) == 1) - { - if(alpha(i) == 1) - { - if (df(i) > upper_bound_p) - upper_bound_p = df(i); - } - else if(alpha(i) == 0) - { - if (df(i) < lower_bound_p) - lower_bound_p = df(i); - } - else - { - ++num_p_free; - sum_p_free += df(i); - } - } - else - { - if(alpha(i) == 1) - { - if (df(i) > upper_bound_n) - upper_bound_n = df(i); - } - else if(alpha(i) == 0) - { - if (df(i) < lower_bound_n) - lower_bound_n = df(i); - } - else - { - ++num_n_free; - sum_n_free += df(i); - } - } - } - - scalar_type r1,r2; - if(num_p_free > 0) - r1 = sum_p_free/num_p_free; - else - r1 = (upper_bound_p+lower_bound_p)/2; - - if(num_n_free > 0) - r2 = sum_n_free/num_n_free; - else - r2 = (upper_bound_n+lower_bound_n)/2; - - rho = (r1+r2)/2; - b = (r1-r2)/2/rho; - } - - // ------------------------------------------------------------------------------------ - - kernel_type kernel_function; - scalar_type nu; - long cache_size; - scalar_type eps; - }; // end of class svm_nu_trainer - -// ---------------------------------------------------------------------------------------- - - template <typename K> - void swap ( - svm_nu_trainer<K>& a, - svm_nu_trainer<K>& b - ) { a.swap(b); } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SVm_NU_TRAINER_Hh_ - diff --git a/ml/dlib/dlib/svm/svm_nu_trainer_abstract.h b/ml/dlib/dlib/svm/svm_nu_trainer_abstract.h deleted file mode 100644 index 5ae0fba4a..000000000 --- a/ml/dlib/dlib/svm/svm_nu_trainer_abstract.h +++ /dev/null @@ -1,210 +0,0 @@ -// Copyright (C) 2007 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_SVm_NU_TRAINER_ABSTRACT_ -#ifdef DLIB_SVm_NU_TRAINER_ABSTRACT_ - -#include <cmath> -#include <limits> -#include <sstream> -#include "../matrix/matrix_abstract.h" -#include "../algs.h" -#include "../serialize.h" -#include "function_abstract.h" -#include "kernel_abstract.h" -#include "../optimization/optimization_solve_qp2_using_smo_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename K - > - class svm_nu_trainer - { - /*! - REQUIREMENTS ON K - is a kernel function object as defined in dlib/svm/kernel_abstract.h - - WHAT THIS OBJECT REPRESENTS - This object implements a trainer for a nu support vector machine for - solving binary classification problems. It is implemented using the SMO - algorithm. - - The implementation of the nu-svm training algorithm used by this object is based - on the following excellent papers: - - Chang and Lin, Training {nu}-Support Vector Classifiers: Theory and Algorithms - - Chih-Chung Chang and Chih-Jen Lin, LIBSVM : a library for support vector - machines, 2001. Software available at http://www.csie.ntu.edu.tw/~cjlin/libsvm - - !*/ - - public: - typedef K kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef decision_function<kernel_type> trained_function_type; - - svm_nu_trainer ( - ); - /*! - ensures - - This object is properly initialized and ready to be used - to train a support vector machine. - - #get_nu() == 0.1 - - #get_cache_size() == 200 - - #get_epsilon() == 0.001 - !*/ - - svm_nu_trainer ( - const kernel_type& kernel, - const scalar_type& nu - ); - /*! - requires - - 0 < nu <= 1 - ensures - - This object is properly initialized and ready to be used - to train a support vector machine. - - #get_kernel() == kernel - - #get_nu() == nu - - #get_cache_size() == 200 - - #get_epsilon() == 0.001 - !*/ - - void set_cache_size ( - long cache_size - ); - /*! - requires - - cache_size > 0 - ensures - - #get_cache_size() == cache_size - !*/ - - const long get_cache_size ( - ) const; - /*! - ensures - - returns the number of megabytes of cache this object will use - when it performs training via the this->train() function. - (bigger values of this may make training go faster but won't affect - the result. However, too big a value will cause you to run out of - memory, obviously.) - !*/ - - void set_epsilon ( - scalar_type eps - ); - /*! - requires - - eps > 0 - ensures - - #get_epsilon() == eps - !*/ - - const scalar_type get_epsilon ( - ) const; - /*! - ensures - - returns the error epsilon that determines when training should stop. - Generally a good value for this is 0.001. Smaller values may result - in a more accurate solution but take longer to execute. - !*/ - - void set_kernel ( - const kernel_type& k - ); - /*! - ensures - - #get_kernel() == k - !*/ - - const kernel_type& get_kernel ( - ) const; - /*! - ensures - - returns a copy of the kernel function in use by this object - !*/ - - void set_nu ( - scalar_type nu - ); - /*! - requires - - 0 < nu <= 1 - ensures - - #get_nu() == nu - !*/ - - const scalar_type get_nu ( - ) const; - /*! - ensures - - returns the nu svm parameter. This is a value between 0 and - 1. It is the parameter that determines the trade off between - trying to fit the training data exactly or allowing more errors - but hopefully improving the generalization ability of the - resulting classifier. Smaller values encourage exact fitting - while larger values of nu may encourage better generalization. - For more information you should consult the papers referenced - above. - !*/ - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const; - /*! - requires - - is_binary_classification_problem(x,y) == true - - x == a matrix or something convertible to a matrix via mat(). - Also, x should contain sample_type objects. - - y == a matrix or something convertible to a matrix via mat(). - Also, y should contain scalar_type objects. - ensures - - trains a nu support vector classifier given the training samples in x and - labels in y. Training is done when the error is less than get_epsilon(). - - returns a decision function F with the following properties: - - if (new_x is a sample predicted have +1 label) then - - F(new_x) >= 0 - - else - - F(new_x) < 0 - throws - - invalid_nu_error - This exception is thrown if get_nu() >= maximum_nu(y) - - std::bad_alloc - !*/ - - void swap ( - svm_nu_trainer& item - ); - /*! - ensures - - swaps *this and item - !*/ - }; - - template <typename K> - void swap ( - svm_nu_trainer<K>& a, - svm_nu_trainer<K>& b - ) { a.swap(b); } - /*! - provides a global swap - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SVm_NU_TRAINER_ABSTRACT_ - - - diff --git a/ml/dlib/dlib/svm/svm_one_class_trainer.h b/ml/dlib/dlib/svm/svm_one_class_trainer.h deleted file mode 100644 index be3cc8caf..000000000 --- a/ml/dlib/dlib/svm/svm_one_class_trainer.h +++ /dev/null @@ -1,284 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_SVm_ONE_CLASS_TRAINER_Hh_ -#define DLIB_SVm_ONE_CLASS_TRAINER_Hh_ - -#include "svm_one_class_trainer_abstract.h" -#include <cmath> -#include <limits> -#include <sstream> -#include "../matrix.h" -#include "../algs.h" - -#include "function.h" -#include "kernel.h" -#include "../optimization/optimization_solve_qp3_using_smo.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename K - > - class svm_one_class_trainer - { - public: - typedef K kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef decision_function<kernel_type> trained_function_type; - - svm_one_class_trainer ( - ) : - nu(0.1), - cache_size(200), - eps(0.001) - { - } - - svm_one_class_trainer ( - const kernel_type& kernel_, - const scalar_type& nu_ - ) : - kernel_function(kernel_), - nu(nu_), - cache_size(200), - eps(0.001) - { - // make sure requires clause is not broken - DLIB_ASSERT(0 < nu && nu <= 1, - "\tsvm_one_class_trainer::svm_one_class_trainer(kernel,nu)" - << "\n\t invalid inputs were given to this function" - << "\n\t nu: " << nu - ); - } - - void set_cache_size ( - long cache_size_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(cache_size_ > 0, - "\tvoid svm_one_class_trainer::set_cache_size(cache_size_)" - << "\n\t invalid inputs were given to this function" - << "\n\t cache_size: " << cache_size_ - ); - cache_size = cache_size_; - } - - long get_cache_size ( - ) const - { - return cache_size; - } - - void set_epsilon ( - scalar_type eps_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(eps_ > 0, - "\tvoid svm_one_class_trainer::set_epsilon(eps_)" - << "\n\t invalid inputs were given to this function" - << "\n\t eps: " << eps_ - ); - eps = eps_; - } - - const scalar_type get_epsilon ( - ) const - { - return eps; - } - - void set_kernel ( - const kernel_type& k - ) - { - kernel_function = k; - } - - const kernel_type& get_kernel ( - ) const - { - return kernel_function; - } - - void set_nu ( - scalar_type nu_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(0 < nu_ && nu_ <= 1, - "\tvoid svm_one_class_trainer::set_nu(nu_)" - << "\n\t invalid inputs were given to this function" - << "\n\t nu: " << nu_ - ); - nu = nu_; - } - - const scalar_type get_nu ( - ) const - { - return nu; - } - - template < - typename in_sample_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x - ) const - { - return do_train(mat(x)); - } - - void swap ( - svm_one_class_trainer& item - ) - { - exchange(kernel_function, item.kernel_function); - exchange(nu, item.nu); - exchange(cache_size, item.cache_size); - exchange(eps, item.eps); - } - - private: - - // ------------------------------------------------------------------------------------ - - template < - typename in_sample_vector_type - > - const decision_function<kernel_type> do_train ( - const in_sample_vector_type& x - ) const - { - typedef typename K::scalar_type scalar_type; - typedef typename decision_function<K>::sample_vector_type sample_vector_type; - typedef typename decision_function<K>::scalar_vector_type scalar_vector_type; - - // make sure requires clause is not broken - DLIB_ASSERT(is_col_vector(x) && x.size() > 0, - "\tdecision_function svm_one_class_trainer::train(x)" - << "\n\t invalid inputs were given to this function" - << "\n\t x.nr(): " << x.nr() - << "\n\t x.nc(): " << x.nc() - ); - - - scalar_vector_type alpha; - - solve_qp3_using_smo<scalar_vector_type> solver; - - solver(symmetric_matrix_cache<float>(kernel_matrix(kernel_function,x), cache_size), - zeros_matrix<scalar_type>(x.size(),1), - ones_matrix<scalar_type>(x.size(),1), - nu*x.size(), - 1, - 1, - alpha, - eps); - - scalar_type rho; - calculate_rho(alpha,solver.get_gradient(),rho); - - - // count the number of support vectors - const long sv_count = (long)sum(alpha != 0); - - scalar_vector_type sv_alpha; - sample_vector_type support_vectors; - - // size these column vectors so that they have an entry for each support vector - sv_alpha.set_size(sv_count); - support_vectors.set_size(sv_count); - - // load the support vectors and their alpha values into these new column matrices - long idx = 0; - for (long i = 0; i < alpha.nr(); ++i) - { - if (alpha(i) != 0) - { - sv_alpha(idx) = alpha(i); - support_vectors(idx) = x(i); - ++idx; - } - } - - // now return the decision function - return decision_function<K> (sv_alpha, rho, kernel_function, support_vectors); - } - - // ------------------------------------------------------------------------------------ - - template < - typename scalar_vector_type - > - void calculate_rho( - const scalar_vector_type& alpha, - const scalar_vector_type& df, - scalar_type& rho - ) const - { - using namespace std; - long num_p_free = 0; - scalar_type sum_p_free = 0; - - - scalar_type upper_bound_p; - scalar_type lower_bound_p; - - find_min_and_max(df, upper_bound_p, lower_bound_p); - - for(long i = 0; i < alpha.nr(); ++i) - { - if(alpha(i) == 1) - { - if (df(i) > upper_bound_p) - upper_bound_p = df(i); - } - else if(alpha(i) == 0) - { - if (df(i) < lower_bound_p) - lower_bound_p = df(i); - } - else - { - ++num_p_free; - sum_p_free += df(i); - } - } - - scalar_type r1; - if(num_p_free > 0) - r1 = sum_p_free/num_p_free; - else - r1 = (upper_bound_p+lower_bound_p)/2; - - rho = r1; - } - - kernel_type kernel_function; - scalar_type nu; - long cache_size; - scalar_type eps; - }; // end of class svm_one_class_trainer - -// ---------------------------------------------------------------------------------------- - - template <typename K> - void swap ( - svm_one_class_trainer<K>& a, - svm_one_class_trainer<K>& b - ) { a.swap(b); } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SVm_ONE_CLASS_TRAINER_Hh_ - diff --git a/ml/dlib/dlib/svm/svm_one_class_trainer_abstract.h b/ml/dlib/dlib/svm/svm_one_class_trainer_abstract.h deleted file mode 100644 index 6b55919ad..000000000 --- a/ml/dlib/dlib/svm/svm_one_class_trainer_abstract.h +++ /dev/null @@ -1,201 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_SVm_ONE_CLASS_TRAINER_ABSTRACT_ -#ifdef DLIB_SVm_ONE_CLASS_TRAINER_ABSTRACT_ - -#include <cmath> -#include <limits> -#include "../matrix/matrix_abstract.h" -#include "../algs.h" -#include "function_abstract.h" -#include "kernel_abstract.h" -#include "../optimization/optimization_solve_qp3_using_smo_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename K - > - class svm_one_class_trainer - { - /*! - REQUIREMENTS ON K - is a kernel function object as defined in dlib/svm/kernel_abstract.h - - WHAT THIS OBJECT REPRESENTS - This object implements a trainer for a support vector machine for - solving one-class classification problems. It is implemented using the SMO - algorithm. - - The implementation of the training algorithm used by this object is based - on the following excellent paper: - - Chih-Chung Chang and Chih-Jen Lin, LIBSVM : a library for support vector - machines, 2001. Software available at http://www.csie.ntu.edu.tw/~cjlin/libsvm - - !*/ - - public: - typedef K kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef decision_function<kernel_type> trained_function_type; - - svm_one_class_trainer ( - ); - /*! - ensures - - This object is properly initialized and ready to be used - to train a support vector machine. - - #get_nu() == 0.1 - - #get_cache_size() == 200 - - #get_epsilon() == 0.001 - !*/ - - svm_one_class_trainer ( - const kernel_type& kernel, - const scalar_type& nu - ); - /*! - requires - - 0 < nu <= 1 - ensures - - This object is properly initialized and ready to be used - to train a support vector machine. - - #get_kernel() == kernel - - #get_nu() == nu - - #get_cache_size() == 200 - - #get_epsilon() == 0.001 - !*/ - - void set_cache_size ( - long cache_size - ); - /*! - requires - - cache_size > 0 - ensures - - #get_cache_size() == cache_size - !*/ - - const long get_cache_size ( - ) const; - /*! - ensures - - returns the number of megabytes of cache this object will use - when it performs training via the this->train() function. - (bigger values of this may make training go faster but won't affect - the result. However, too big a value will cause you to run out of - memory, obviously.) - !*/ - - void set_epsilon ( - scalar_type eps - ); - /*! - requires - - eps > 0 - ensures - - #get_epsilon() == eps - !*/ - - const scalar_type get_epsilon ( - ) const; - /*! - ensures - - returns the error epsilon that determines when training should stop. - Generally a good value for this is 0.001. Smaller values may result - in a more accurate solution but take longer to execute. - !*/ - - void set_kernel ( - const kernel_type& k - ); - /*! - ensures - - #get_kernel() == k - !*/ - - const kernel_type& get_kernel ( - ) const; - /*! - ensures - - returns a copy of the kernel function in use by this object - !*/ - - void set_nu ( - scalar_type nu - ); - /*! - requires - - 0 < nu <= 1 - ensures - - #get_nu() == nu - !*/ - - const scalar_type get_nu ( - ) const; - /*! - ensures - - returns the nu svm parameter. This is a value between 0 and - 1. It is the parameter that determines the trade off between - trying to fit the training data exactly or allowing more errors - but hopefully improving the generalization ability of the - resulting classifier. Smaller values encourage exact fitting - while larger values of nu may encourage better generalization. - For more information you should consult the papers referenced - above. - !*/ - - template < - typename in_sample_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x - ) const; - /*! - requires - - x.size() > 0 - - is_col_vector(x) == true - - x == a matrix or something convertible to a matrix via mat(). - Also, x should contain sample_type objects. - ensures - - trains a one-class support vector classifier given the training samples in x. - Training is done when the error is less than get_epsilon(). - - returns a decision function F with the following properties: - - if (new_x is a sample predicted to arise from the distribution - which generated the training samples) then - - F(new_x) >= 0 - - else - - F(new_x) < 0 - !*/ - - void swap ( - svm_one_class_trainer& item - ); - /*! - ensures - - swaps *this and item - !*/ - }; - - template <typename K> - void swap ( - svm_one_class_trainer<K>& a, - svm_one_class_trainer<K>& b - ) { a.swap(b); } - /*! - provides a global swap - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SVm_ONE_CLASS_TRAINER_ABSTRACT_ - - - diff --git a/ml/dlib/dlib/svm/svm_rank_trainer.h b/ml/dlib/dlib/svm/svm_rank_trainer.h deleted file mode 100644 index 0be737f48..000000000 --- a/ml/dlib/dlib/svm/svm_rank_trainer.h +++ /dev/null @@ -1,495 +0,0 @@ -// Copyright (C) 2012 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_SVM_RANK_TrAINER_Hh_ -#define DLIB_SVM_RANK_TrAINER_Hh_ - -#include "svm_rank_trainer_abstract.h" - -#include "ranking_tools.h" -#include "../algs.h" -#include "../optimization.h" -#include "function.h" -#include "kernel.h" -#include "sparse_vector.h" -#include <iostream> - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type, - typename sample_type - > - class oca_problem_ranking_svm : public oca_problem<matrix_type > - { - public: - /* - This class is used as part of the implementation of the svm_rank_trainer - defined towards the end of this file. - */ - - typedef typename matrix_type::type scalar_type; - - oca_problem_ranking_svm( - const scalar_type C_, - const std::vector<ranking_pair<sample_type> >& samples_, - const bool be_verbose_, - const scalar_type eps_, - const unsigned long max_iter, - const unsigned long dims_ - ) : - samples(samples_), - C(C_), - be_verbose(be_verbose_), - eps(eps_), - max_iterations(max_iter), - dims(dims_) - { - } - - virtual scalar_type get_c ( - ) const - { - return C; - } - - virtual long get_num_dimensions ( - ) const - { - return dims; - } - - virtual bool optimization_status ( - scalar_type current_objective_value, - scalar_type current_error_gap, - scalar_type current_risk_value, - scalar_type current_risk_gap, - unsigned long num_cutting_planes, - unsigned long num_iterations - ) const - { - if (be_verbose) - { - using namespace std; - cout << "objective: " << current_objective_value << endl; - cout << "objective gap: " << current_error_gap << endl; - cout << "risk: " << current_risk_value << endl; - cout << "risk gap: " << current_risk_gap << endl; - cout << "num planes: " << num_cutting_planes << endl; - cout << "iter: " << num_iterations << endl; - cout << endl; - } - - if (num_iterations >= max_iterations) - return true; - - if (current_risk_gap < eps) - return true; - - return false; - } - - virtual bool risk_has_lower_bound ( - scalar_type& lower_bound - ) const - { - lower_bound = 0; - return true; - } - - virtual void get_risk ( - matrix_type& w, - scalar_type& risk, - matrix_type& subgradient - ) const - { - subgradient.set_size(w.size(),1); - subgradient = 0; - risk = 0; - - // Note that we want the risk value to be in terms of the fraction of overall - // rank flips. So a risk of 0.1 would mean that rank flips happen < 10% of the - // time. - - - std::vector<double> rel_scores; - std::vector<double> nonrel_scores; - std::vector<unsigned long> rel_counts; - std::vector<unsigned long> nonrel_counts; - - unsigned long total_pairs = 0; - - // loop over all the samples and compute the risk and its subgradient at the current solution point w - for (unsigned long i = 0; i < samples.size(); ++i) - { - rel_scores.resize(samples[i].relevant.size()); - nonrel_scores.resize(samples[i].nonrelevant.size()); - - for (unsigned long k = 0; k < rel_scores.size(); ++k) - rel_scores[k] = dot(samples[i].relevant[k], w); - - for (unsigned long k = 0; k < nonrel_scores.size(); ++k) - nonrel_scores[k] = dot(samples[i].nonrelevant[k], w) + 1; - - count_ranking_inversions(rel_scores, nonrel_scores, rel_counts, nonrel_counts); - - total_pairs += rel_scores.size()*nonrel_scores.size(); - - for (unsigned long k = 0; k < rel_counts.size(); ++k) - { - if (rel_counts[k] != 0) - { - risk -= rel_counts[k]*rel_scores[k]; - subtract_from(subgradient, samples[i].relevant[k], rel_counts[k]); - } - } - - for (unsigned long k = 0; k < nonrel_counts.size(); ++k) - { - if (nonrel_counts[k] != 0) - { - risk += nonrel_counts[k]*nonrel_scores[k]; - add_to(subgradient, samples[i].nonrelevant[k], nonrel_counts[k]); - } - } - - } - - const scalar_type scale = 1.0/total_pairs; - - risk *= scale; - subgradient = scale*subgradient; - } - - private: - - // ----------------------------------------------------- - // ----------------------------------------------------- - - - const std::vector<ranking_pair<sample_type> >& samples; - const scalar_type C; - - const bool be_verbose; - const scalar_type eps; - const unsigned long max_iterations; - const unsigned long dims; - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type, - typename sample_type, - typename scalar_type - > - oca_problem_ranking_svm<matrix_type, sample_type> make_oca_problem_ranking_svm ( - const scalar_type C, - const std::vector<ranking_pair<sample_type> >& samples, - const bool be_verbose, - const scalar_type eps, - const unsigned long max_iterations, - const unsigned long dims - ) - { - return oca_problem_ranking_svm<matrix_type, sample_type>( - C, samples, be_verbose, eps, max_iterations, dims); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename K - > - class svm_rank_trainer - { - - public: - typedef K kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef decision_function<kernel_type> trained_function_type; - - // You are getting a compiler error on this line because you supplied a non-linear kernel - // to the svm_rank_trainer object. You have to use one of the linear kernels with this - // trainer. - COMPILE_TIME_ASSERT((is_same_type<K, linear_kernel<sample_type> >::value || - is_same_type<K, sparse_linear_kernel<sample_type> >::value )); - - svm_rank_trainer ( - ) - { - C = 1; - verbose = false; - eps = 0.001; - max_iterations = 10000; - learn_nonnegative_weights = false; - last_weight_1 = false; - } - - explicit svm_rank_trainer ( - const scalar_type& C_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(C_ > 0, - "\t svm_rank_trainer::svm_rank_trainer()" - << "\n\t C_ must be greater than 0" - << "\n\t C_: " << C_ - << "\n\t this: " << this - ); - - C = C_; - verbose = false; - eps = 0.001; - max_iterations = 10000; - learn_nonnegative_weights = false; - last_weight_1 = false; - } - - void set_epsilon ( - scalar_type eps_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(eps_ > 0, - "\t void svm_rank_trainer::set_epsilon()" - << "\n\t eps_ must be greater than 0" - << "\n\t eps_: " << eps_ - << "\n\t this: " << this - ); - - eps = eps_; - } - - const scalar_type get_epsilon ( - ) const { return eps; } - - unsigned long get_max_iterations ( - ) const { return max_iterations; } - - void set_max_iterations ( - unsigned long max_iter - ) - { - max_iterations = max_iter; - } - - void be_verbose ( - ) - { - verbose = true; - } - - void be_quiet ( - ) - { - verbose = false; - } - - bool forces_last_weight_to_1 ( - ) const - { - return last_weight_1; - } - - void force_last_weight_to_1 ( - bool should_last_weight_be_1 - ) - { - last_weight_1 = should_last_weight_be_1; - if (last_weight_1) - prior.set_size(0); - } - - void set_oca ( - const oca& item - ) - { - solver = item; - } - - const oca get_oca ( - ) const - { - return solver; - } - - const kernel_type get_kernel ( - ) const - { - return kernel_type(); - } - - bool learns_nonnegative_weights ( - ) const { return learn_nonnegative_weights; } - - void set_learns_nonnegative_weights ( - bool value - ) - { - learn_nonnegative_weights = value; - if (learn_nonnegative_weights) - prior.set_size(0); - } - - void set_prior ( - const trained_function_type& prior_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(prior_.basis_vectors.size() == 1 && - prior_.alpha(0) == 1, - "\t void svm_rank_trainer::set_prior()" - << "\n\t The supplied prior could not have been created by this object's train() method." - << "\n\t prior_.basis_vectors.size(): " << prior_.basis_vectors.size() - << "\n\t prior_.alpha(0): " << prior_.alpha(0) - << "\n\t this: " << this - ); - - prior = sparse_to_dense(prior_.basis_vectors(0)); - learn_nonnegative_weights = false; - last_weight_1 = false; - } - - bool has_prior ( - ) const - { - return prior.size() != 0; - } - - void set_c ( - scalar_type C_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(C_ > 0, - "\t void svm_rank_trainer::set_c()" - << "\n\t C_ must be greater than 0" - << "\n\t C_: " << C_ - << "\n\t this: " << this - ); - - C = C_; - } - - const scalar_type get_c ( - ) const - { - return C; - } - - const decision_function<kernel_type> train ( - const std::vector<ranking_pair<sample_type> >& samples - ) const - { - // make sure requires clause is not broken - DLIB_CASSERT(is_ranking_problem(samples) == true, - "\t decision_function svm_rank_trainer::train(samples)" - << "\n\t invalid inputs were given to this function" - << "\n\t samples.size(): " << samples.size() - << "\n\t is_ranking_problem(samples): " << is_ranking_problem(samples) - ); - - - typedef matrix<scalar_type,0,1> w_type; - w_type w; - - const unsigned long num_dims = max_index_plus_one(samples); - - unsigned long num_nonnegative = 0; - if (learn_nonnegative_weights) - { - num_nonnegative = num_dims; - } - - unsigned long force_weight_1_idx = std::numeric_limits<unsigned long>::max(); - if (last_weight_1) - { - force_weight_1_idx = num_dims-1; - } - - if (has_prior()) - { - if (is_matrix<sample_type>::value) - { - // make sure requires clause is not broken - DLIB_CASSERT(num_dims == (unsigned long)prior.size(), - "\t decision_function svm_rank_trainer::train(samples)" - << "\n\t The dimension of the training vectors must match the dimension of\n" - << "\n\t those used to create the prior." - << "\n\t num_dims: " << num_dims - << "\n\t prior.size(): " << prior.size() - ); - } - const unsigned long dims = std::max(num_dims, (unsigned long)prior.size()); - // In the case of sparse sample vectors, it is possible that the input - // vector dimensionality is larger than the prior vector dimensionality. - // We need to check for this case and pad prior with zeros if it is the - // case. - if ((unsigned long)prior.size() < dims) - { - matrix<scalar_type,0,1> prior_temp = join_cols(prior, zeros_matrix<scalar_type>(dims-prior.size(),1)); - solver( make_oca_problem_ranking_svm<w_type>(C, samples, verbose, eps, max_iterations, dims), - w, - prior_temp); - } - else - { - solver( make_oca_problem_ranking_svm<w_type>(C, samples, verbose, eps, max_iterations, dims), - w, - prior); - } - - } - else - { - solver( make_oca_problem_ranking_svm<w_type>(C, samples, verbose, eps, max_iterations, num_dims), - w, - num_nonnegative, - force_weight_1_idx); - } - - - // put the solution into a decision function and then return it - decision_function<kernel_type> df; - df.b = 0; - df.basis_vectors.set_size(1); - // Copy the results into the output basis vector. The output vector might be a - // sparse vector container so we need to use this special kind of copy to - // handle that case. - assign(df.basis_vectors(0), matrix_cast<scalar_type>(w)); - df.alpha.set_size(1); - df.alpha(0) = 1; - - return df; - } - - const decision_function<kernel_type> train ( - const ranking_pair<sample_type>& sample - ) const - { - return train(std::vector<ranking_pair<sample_type> >(1, sample)); - } - - private: - - scalar_type C; - oca solver; - scalar_type eps; - bool verbose; - unsigned long max_iterations; - bool learn_nonnegative_weights; - bool last_weight_1; - matrix<scalar_type,0,1> prior; - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SVM_RANK_TrAINER_Hh_ - diff --git a/ml/dlib/dlib/svm/svm_rank_trainer_abstract.h b/ml/dlib/dlib/svm/svm_rank_trainer_abstract.h deleted file mode 100644 index 4658d950f..000000000 --- a/ml/dlib/dlib/svm/svm_rank_trainer_abstract.h +++ /dev/null @@ -1,298 +0,0 @@ -// Copyright (C) 2012 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_SVM_RANK_TrAINER_ABSTRACT_Hh_ -#ifdef DLIB_SVM_RANK_TrAINER_ABSTRACT_Hh_ - -#include "ranking_tools_abstract.h" -#include "sparse_vector_abstract.h" -#include "function_abstract.h" -#include "kernel_abstract.h" -#include "../algs.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename K - > - class svm_rank_trainer - { - /*! - REQUIREMENTS ON K - Is either linear_kernel or sparse_linear_kernel. - - WHAT THIS OBJECT REPRESENTS - This object represents a tool for training a ranking support vector machine - using linear kernels. In particular, this object is a tool for training - the Ranking SVM described in the paper: - Optimizing Search Engines using Clickthrough Data by Thorsten Joachims - - Note that we normalize the C parameter by multiplying it by 1/(number of ranking pairs). - Therefore, to make an exact comparison between this object and Equation 12 - in the paper you must multiply C by the appropriate normalizing quantity. - - Finally, note that the implementation of this object is done using the oca - optimizer and count_ranking_inversions() method. This means that it runs - in O(n*log(n)) time, making it suitable for use with large datasets. - !*/ - - public: - typedef K kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef decision_function<kernel_type> trained_function_type; - - svm_rank_trainer ( - ); - /*! - ensures - - This object is properly initialized and ready to be used to train a - ranking support vector machine. - - #get_oca() == oca() (i.e. an instance of oca with default parameters) - - #get_c() == 1 - - #get_epsilon() == 0.001 - - this object will not be verbose unless be_verbose() is called - - #get_max_iterations() == 10000 - - #learns_nonnegative_weights() == false - - #forces_last_weight_to_1() == false - - #has_prior() == false - !*/ - - explicit svm_rank_trainer ( - const scalar_type& C - ); - /*! - requires - - C > 0 - ensures - - This object is properly initialized and ready to be used to train a - ranking support vector machine. - - #get_oca() == oca() (i.e. an instance of oca with default parameters) - - #get_c() == C - - #get_epsilon() == 0.001 - - this object will not be verbose unless be_verbose() is called - - #get_max_iterations() == 10000 - - #learns_nonnegative_weights() == false - - #forces_last_weight_to_1() == false - - #has_prior() == false - !*/ - - void set_epsilon ( - scalar_type eps - ); - /*! - requires - - eps > 0 - ensures - - #get_epsilon() == eps - !*/ - - const scalar_type get_epsilon ( - ); - /*! - ensures - - returns the error epsilon that determines when training should stop. - Smaller values may result in a more accurate solution but take longer to - train. You can think of this epsilon value as saying "solve the - optimization problem until the average ranking accuracy is within epsilon - of its optimal value". Here we mean "ranking accuracy" in the same sense - used by test_ranking_function() and cross_validate_ranking_trainer(). - !*/ - - unsigned long get_max_iterations ( - ) const; - /*! - ensures - - returns the maximum number of iterations the SVM optimizer is allowed to - run before it is required to stop and return a result. - !*/ - - void set_max_iterations ( - unsigned long max_iter - ); - /*! - ensures - - #get_max_iterations() == max_iter - !*/ - - void be_verbose ( - ); - /*! - ensures - - This object will print status messages to standard out so that a user can - observe the progress of the algorithm. - !*/ - - void be_quiet ( - ); - /*! - ensures - - this object will not print anything to standard out - !*/ - - bool forces_last_weight_to_1 ( - ) const; - /*! - ensures - - returns true if this trainer has the constraint that the last weight in - the learned parameter vector must be 1. This is the weight corresponding - to the feature in the training vectors with the highest dimension. - !*/ - - void force_last_weight_to_1 ( - bool should_last_weight_be_1 - ); - /*! - ensures - - #forces_last_weight_to_1() == should_last_weight_be_1 - - if (should_last_weight_be_1 == true) then - - #has_prior() == false - !*/ - - void set_oca ( - const oca& item - ); - /*! - ensures - - #get_oca() == item - !*/ - - const oca get_oca ( - ) const; - /*! - ensures - - returns a copy of the optimizer used to solve the SVM problem. - !*/ - - const kernel_type get_kernel ( - ) const; - /*! - ensures - - returns a copy of the kernel function in use by this object. Since the - linear kernels don't have any parameters this function just returns - kernel_type() - !*/ - - bool learns_nonnegative_weights ( - ) const; - /*! - ensures - - The output of training is a weight vector that defines the behavior of - the resulting decision function. That is, the decision function simply - takes the dot product between the learned weight vector and a test sample - and returns the result. Therefore, if learns_nonnegative_weights() == true - then the resulting learned weight vector will always have non-negative - entries. - !*/ - - void set_learns_nonnegative_weights ( - bool value - ); - /*! - ensures - - #learns_nonnegative_weights() == value - - if (value == true) then - - #has_prior() == false - !*/ - - void set_prior ( - const trained_function_type& prior - ); - /*! - requires - - prior == a function produced by a call to this class's train() function. - Therefore, it must be the case that: - - prior.basis_vectors.size() == 1 - - prior.alpha(0) == 1 - ensures - - Subsequent calls to train() will try to learn a function similar to the - given prior. - - #has_prior() == true - - #learns_nonnegative_weights() == false - - #forces_last_weight_to_1() == false - !*/ - - bool has_prior ( - ) const - /*! - ensures - - returns true if a prior has been set and false otherwise. Having a prior - set means that you have called set_prior() and supplied a previously - trained function as a reference. In this case, any call to train() will - try to learn a function that matches the behavior of the prior as close - as possible but also fits the supplied training data. In more technical - detail, having a prior means we replace the ||w||^2 regularizer with one - of the form ||w-prior||^2 where w is the set of parameters for a learned - function. - !*/ - - void set_c ( - scalar_type C - ); - /*! - requires - - C > 0 - ensures - - #get_c() == C - !*/ - - const scalar_type get_c ( - ) const; - /*! - ensures - - returns the SVM regularization parameter. It is the parameter that - determines the trade off between trying to fit the training data exactly - or allowing more errors but hopefully improving the generalization of the - resulting classifier. Larger values encourage exact fitting while - smaller values of C may encourage better generalization. - !*/ - - const decision_function<kernel_type> train ( - const std::vector<ranking_pair<sample_type> >& samples - ) const; - /*! - requires - - is_ranking_problem(samples) == true - - if (has_prior()) then - - The vectors in samples must have the same dimensionality as the - vectors used to train the prior given to set_prior(). - ensures - - trains a ranking support vector classifier given the training samples. - - returns a decision function F with the following properties: - - F.alpha.size() == 1 - - F.basis_vectors.size() == 1 - - F.alpha(0) == 1 - - Given two vectors, A and B, then A is predicted to come before B - in the learned ranking if and only if F(A) > F(B). - - Based on the contents of samples, F will attempt to give relevant - vectors higher scores than non-relevant vectors. - !*/ - - const decision_function<kernel_type> train ( - const ranking_pair<sample_type>& sample - ) const; - /*! - requires - - is_ranking_problem(std::vector<ranking_pair<sample_type> >(1, sample)) == true - - if (has_prior()) then - - The vectors in samples must have the same dimensionality as the - vectors used to train the prior given to set_prior(). - ensures - - This is just a convenience routine for calling the above train() - function. That is, it just copies sample into a std::vector object and - invokes the above train() method. This means that calling this function - is equivalent to invoking: - return train(std::vector<ranking_pair<sample_type> >(1, sample)); - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SVM_RANK_TrAINER_ABSTRACT_Hh_ - diff --git a/ml/dlib/dlib/svm/svm_threaded.h b/ml/dlib/dlib/svm/svm_threaded.h deleted file mode 100644 index 37927456b..000000000 --- a/ml/dlib/dlib/svm/svm_threaded.h +++ /dev/null @@ -1,253 +0,0 @@ -// Copyright (C) 2008 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_SVm_THREADED_ -#define DLIB_SVm_THREADED_ - -#include <cmath> -#include <iostream> -#include <limits> -#include <sstream> -#include <vector> - -#include "svm_threaded_abstract.h" -#include "svm.h" -#include "../matrix.h" -#include "../algs.h" -#include "../serialize.h" -#include "function.h" -#include "kernel.h" -#include "../threads.h" -#include "../pipe.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - namespace cvtti_helpers - { - template <typename trainer_type, typename in_sample_vector_type> - struct job - { - typedef typename trainer_type::scalar_type scalar_type; - typedef typename trainer_type::sample_type sample_type; - typedef typename trainer_type::mem_manager_type mem_manager_type; - typedef matrix<sample_type,0,1,mem_manager_type> sample_vector_type; - typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type; - - job() : x(0) {} - - trainer_type trainer; - matrix<long,0,1> x_test, x_train; - scalar_vector_type y_test, y_train; - const in_sample_vector_type* x; - }; - - struct task - { - template < - typename trainer_type, - typename mem_manager_type, - typename in_sample_vector_type - > - void operator()( - job<trainer_type,in_sample_vector_type>& j, - matrix<double,1,2,mem_manager_type>& result - ) - { - try - { - result = test_binary_decision_function(j.trainer.train(rowm(*j.x,j.x_train), j.y_train), rowm(*j.x,j.x_test), j.y_test); - - // Do this just to make j release it's memory since people might run threaded cross validation - // on very large datasets. Every bit of freed memory helps out. - j = job<trainer_type,in_sample_vector_type>(); - } - catch (invalid_nu_error&) - { - // If this is a svm_nu_trainer then we might get this exception if the nu is - // invalid. In this case just return a cross validation score of 0. - result = 0; - } - catch (std::bad_alloc&) - { - std::cerr << "\nstd::bad_alloc thrown while running cross_validate_trainer_threaded(). Not enough memory.\n" << std::endl; - throw; - } - } - }; - } - - template < - typename trainer_type, - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const matrix<double, 1, 2, typename trainer_type::mem_manager_type> - cross_validate_trainer_threaded_impl ( - const trainer_type& trainer, - const in_sample_vector_type& x, - const in_scalar_vector_type& y, - const long folds, - const long num_threads - ) - { - using namespace dlib::cvtti_helpers; - typedef typename trainer_type::mem_manager_type mem_manager_type; - - // make sure requires clause is not broken - DLIB_ASSERT(is_binary_classification_problem(x,y) == true && - 1 < folds && folds <= std::min(sum(y>0),sum(y<0)) && - num_threads > 0, - "\tmatrix cross_validate_trainer()" - << "\n\t invalid inputs were given to this function" - << "\n\t std::min(sum(y>0),sum(y<0)): " << std::min(sum(y>0),sum(y<0)) - << "\n\t folds: " << folds - << "\n\t num_threads: " << num_threads - << "\n\t is_binary_classification_problem(x,y): " << ((is_binary_classification_problem(x,y))? "true":"false") - ); - - - task mytask; - thread_pool tp(num_threads); - - - // count the number of positive and negative examples - long num_pos = 0; - long num_neg = 0; - for (long r = 0; r < y.nr(); ++r) - { - if (y(r) == +1.0) - ++num_pos; - else - ++num_neg; - } - - // figure out how many positive and negative examples we will have in each fold - const long num_pos_test_samples = num_pos/folds; - const long num_pos_train_samples = num_pos - num_pos_test_samples; - const long num_neg_test_samples = num_neg/folds; - const long num_neg_train_samples = num_neg - num_neg_test_samples; - - - long pos_idx = 0; - long neg_idx = 0; - - - - std::vector<future<job<trainer_type,in_sample_vector_type> > > jobs(folds); - std::vector<future<matrix<double, 1, 2, mem_manager_type> > > results(folds); - - - for (long i = 0; i < folds; ++i) - { - job<trainer_type,in_sample_vector_type>& j = jobs[i].get(); - - j.x = &x; - j.x_test.set_size (num_pos_test_samples + num_neg_test_samples); - j.y_test.set_size (num_pos_test_samples + num_neg_test_samples); - j.x_train.set_size(num_pos_train_samples + num_neg_train_samples); - j.y_train.set_size(num_pos_train_samples + num_neg_train_samples); - j.trainer = trainer; - - long cur = 0; - - // load up our positive test samples - while (cur < num_pos_test_samples) - { - if (y(pos_idx) == +1.0) - { - j.x_test(cur) = pos_idx; - j.y_test(cur) = +1.0; - ++cur; - } - pos_idx = (pos_idx+1)%x.nr(); - } - - // load up our negative test samples - while (cur < j.x_test.nr()) - { - if (y(neg_idx) == -1.0) - { - j.x_test(cur) = neg_idx; - j.y_test(cur) = -1.0; - ++cur; - } - neg_idx = (neg_idx+1)%x.nr(); - } - - // load the training data from the data following whatever we loaded - // as the testing data - long train_pos_idx = pos_idx; - long train_neg_idx = neg_idx; - cur = 0; - - // load up our positive train samples - while (cur < num_pos_train_samples) - { - if (y(train_pos_idx) == +1.0) - { - j.x_train(cur) = train_pos_idx; - j.y_train(cur) = +1.0; - ++cur; - } - train_pos_idx = (train_pos_idx+1)%x.nr(); - } - - // load up our negative train samples - while (cur < j.x_train.nr()) - { - if (y(train_neg_idx) == -1.0) - { - j.x_train(cur) = train_neg_idx; - j.y_train(cur) = -1.0; - ++cur; - } - train_neg_idx = (train_neg_idx+1)%x.nr(); - } - - // finally spawn a task to process this job - tp.add_task(mytask, jobs[i], results[i]); - - } // for (long i = 0; i < folds; ++i) - - matrix<double, 1, 2, mem_manager_type> res; - set_all_elements(res,0); - - // now compute the total results - for (long i = 0; i < folds; ++i) - { - res += results[i].get(); - } - - return res/(double)folds; - } - - template < - typename trainer_type, - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const matrix<double, 1, 2, typename trainer_type::mem_manager_type> - cross_validate_trainer_threaded ( - const trainer_type& trainer, - const in_sample_vector_type& x, - const in_scalar_vector_type& y, - const long folds, - const long num_threads - ) - { - return cross_validate_trainer_threaded_impl(trainer, - mat(x), - mat(y), - folds, - num_threads); - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SVm_THREADED_ - - diff --git a/ml/dlib/dlib/svm/svm_threaded_abstract.h b/ml/dlib/dlib/svm/svm_threaded_abstract.h deleted file mode 100644 index f9973fb5c..000000000 --- a/ml/dlib/dlib/svm/svm_threaded_abstract.h +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright (C) 2008 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_SVm_THREADED_ABSTRACT_ -#ifdef DLIB_SVm_THREADED_ABSTRACT_ - -#include "../matrix/matrix_abstract.h" -#include "../algs.h" -#include "../svm.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename trainer_type, - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const matrix<double, 1, 2, typename trainer_type::mem_manager_type> - cross_validate_trainer_threaded ( - const trainer_type& trainer, - const in_sample_vector_type& x, - const in_scalar_vector_type& y, - const long folds, - const long num_threads - ); - /*! - requires - - is_binary_classification_problem(x,y) == true - - 1 < folds <= std::min(sum(y>0),sum(y<0)) - (e.g. There must be at least as many examples of each class as there are folds) - - trainer_type == some kind of trainer object (e.g. svm_nu_trainer) - - num_threads > 0 - - It must be safe for multiple trainer objects to access the elements of x from - multiple threads at the same time. Note that all trainers and kernels in - dlib are thread safe in this regard since they do not mutate the elements of x. - ensures - - performs k-fold cross validation by using the given trainer to solve the - given binary classification problem for the given number of folds. - Each fold is tested using the output of the trainer and the average - classification accuracy from all folds is returned. - - uses num_threads threads of execution in doing the cross validation. - - The accuracy is returned in a row vector, let us call it R. Both - quantities in R are numbers between 0 and 1 which represent the fraction - of examples correctly classified. R(0) is the fraction of +1 examples - correctly classified and R(1) is the fraction of -1 examples correctly - classified. - - The number of folds used is given by the folds argument. - throws - - any exceptions thrown by trainer.train() - - std::bad_alloc - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SVm_THREADED_ABSTRACT_ - - - diff --git a/ml/dlib/dlib/svm/svr_linear_trainer.h b/ml/dlib/dlib/svm/svr_linear_trainer.h deleted file mode 100644 index 27ce5b52a..000000000 --- a/ml/dlib/dlib/svm/svr_linear_trainer.h +++ /dev/null @@ -1,424 +0,0 @@ -// Copyright (C) 2013 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_SVR_LINEAR_TrAINER_Hh_ -#define DLIB_SVR_LINEAR_TrAINER_Hh_ - -#include "svr_linear_trainer_abstract.h" - -#include "../algs.h" -#include "../optimization.h" -#include "function.h" -#include "kernel.h" -#include "sparse_vector.h" -#include <iostream> - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type, - typename sample_type - > - class oca_problem_linear_svr : public oca_problem<matrix_type > - { - public: - /* - This class is used as part of the implementation of the svr_linear_trainer - defined towards the end of this file. - */ - - typedef typename matrix_type::type scalar_type; - - oca_problem_linear_svr( - const scalar_type C_, - const std::vector<sample_type>& samples_, - const std::vector<scalar_type>& targets_, - const bool be_verbose_, - const scalar_type eps_, - const scalar_type eps_insensitivity_, - const unsigned long max_iter - ) : - samples(samples_), - targets(targets_), - C(C_), - be_verbose(be_verbose_), - eps(eps_), - eps_insensitivity(eps_insensitivity_), - max_iterations(max_iter) - { - } - - virtual scalar_type get_c ( - ) const - { - return C; - } - - virtual long get_num_dimensions ( - ) const - { - // plus one for the bias term - return max_index_plus_one(samples) + 1; - } - - virtual bool optimization_status ( - scalar_type current_objective_value, - scalar_type current_error_gap, - scalar_type current_risk_value, - scalar_type current_risk_gap, - unsigned long num_cutting_planes, - unsigned long num_iterations - ) const - { - current_risk_value /= samples.size(); - current_risk_gap /= samples.size(); - if (be_verbose) - { - using namespace std; - cout << "objective: " << current_objective_value << endl; - cout << "objective gap: " << current_error_gap << endl; - cout << "risk: " << current_risk_value << endl; - cout << "risk gap: " << current_risk_gap << endl; - cout << "num planes: " << num_cutting_planes << endl; - cout << "iter: " << num_iterations << endl; - cout << endl; - } - - if (num_iterations >= max_iterations) - return true; - - if (current_risk_gap < eps*eps_insensitivity) - return true; - - return false; - } - - virtual bool risk_has_lower_bound ( - scalar_type& lower_bound - ) const - { - lower_bound = 0; - return true; - } - - virtual void get_risk ( - matrix_type& w, - scalar_type& risk, - matrix_type& subgradient - ) const - { - subgradient.set_size(w.size(),1); - subgradient = 0; - risk = 0; - - // loop over all the samples and compute the risk and its subgradient at the current solution point w - for (unsigned long i = 0; i < samples.size(); ++i) - { - const long w_size_m1 = w.size()-1; - const scalar_type prediction = dot(colm(w,0,w_size_m1), samples[i]) - w(w_size_m1); - - if (std::abs(prediction - targets[i]) > eps_insensitivity) - { - if (prediction < targets[i]) - { - subtract_from(subgradient, samples[i]); - subgradient(w_size_m1) += 1; - } - else - { - add_to(subgradient, samples[i]); - subgradient(w_size_m1) -= 1; - } - - risk += std::abs(prediction - targets[i]) - eps_insensitivity; - } - } - } - - private: - - // ----------------------------------------------------- - // ----------------------------------------------------- - - - const std::vector<sample_type>& samples; - const std::vector<scalar_type>& targets; - const scalar_type C; - - const bool be_verbose; - const scalar_type eps; - const scalar_type eps_insensitivity; - const unsigned long max_iterations; - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type, - typename sample_type, - typename scalar_type - > - oca_problem_linear_svr<matrix_type, sample_type> make_oca_problem_linear_svr ( - const scalar_type C, - const std::vector<sample_type>& samples, - const std::vector<scalar_type>& targets, - const bool be_verbose, - const scalar_type eps, - const scalar_type eps_insensitivity, - const unsigned long max_iterations - ) - { - return oca_problem_linear_svr<matrix_type, sample_type>( - C, samples, targets, be_verbose, eps, eps_insensitivity, max_iterations); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename K - > - class svr_linear_trainer - { - - public: - typedef K kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef decision_function<kernel_type> trained_function_type; - - // You are getting a compiler error on this line because you supplied a non-linear kernel - // to the svr_linear_trainer object. You have to use one of the linear kernels with this - // trainer. - COMPILE_TIME_ASSERT((is_same_type<K, linear_kernel<sample_type> >::value || - is_same_type<K, sparse_linear_kernel<sample_type> >::value )); - - svr_linear_trainer ( - ) - { - C = 1; - verbose = false; - eps = 0.01; - max_iterations = 10000; - learn_nonnegative_weights = false; - last_weight_1 = false; - eps_insensitivity = 0.1; - } - - explicit svr_linear_trainer ( - const scalar_type& C_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(C_ > 0, - "\t svr_linear_trainer::svr_linear_trainer()" - << "\n\t C_ must be greater than 0" - << "\n\t C_: " << C_ - << "\n\t this: " << this - ); - - C = C_; - verbose = false; - eps = 0.01; - max_iterations = 10000; - learn_nonnegative_weights = false; - last_weight_1 = false; - eps_insensitivity = 0.1; - } - - void set_epsilon ( - scalar_type eps_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(eps_ > 0, - "\t void svr_linear_trainer::set_epsilon()" - << "\n\t eps_ must be greater than 0" - << "\n\t eps_: " << eps_ - << "\n\t this: " << this - ); - - eps = eps_; - } - - const scalar_type get_epsilon ( - ) const { return eps; } - - void set_epsilon_insensitivity ( - scalar_type eps_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(eps_ > 0, - "\tvoid svr_linear_trainer::set_epsilon_insensitivity(eps_)" - << "\n\t invalid inputs were given to this function" - << "\n\t eps_: " << eps_ - ); - eps_insensitivity = eps_; - } - - const scalar_type get_epsilon_insensitivity ( - ) const - { - return eps_insensitivity; - } - - unsigned long get_max_iterations ( - ) const { return max_iterations; } - - void set_max_iterations ( - unsigned long max_iter - ) - { - max_iterations = max_iter; - } - - void be_verbose ( - ) - { - verbose = true; - } - - void be_quiet ( - ) - { - verbose = false; - } - - bool forces_last_weight_to_1 ( - ) const - { - return last_weight_1; - } - - void force_last_weight_to_1 ( - bool should_last_weight_be_1 - ) - { - last_weight_1 = should_last_weight_be_1; - } - - void set_oca ( - const oca& item - ) - { - solver = item; - } - - const oca get_oca ( - ) const - { - return solver; - } - - const kernel_type get_kernel ( - ) const - { - return kernel_type(); - } - - bool learns_nonnegative_weights ( - ) const { return learn_nonnegative_weights; } - - void set_learns_nonnegative_weights ( - bool value - ) - { - learn_nonnegative_weights = value; - } - - void set_c ( - scalar_type C_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(C_ > 0, - "\t void svr_linear_trainer::set_c()" - << "\n\t C_ must be greater than 0" - << "\n\t C_: " << C_ - << "\n\t this: " << this - ); - - C = C_; - } - - const scalar_type get_c ( - ) const - { - return C; - } - - const decision_function<kernel_type> train ( - const std::vector<sample_type>& samples, - const std::vector<scalar_type>& targets - ) const - { - // make sure requires clause is not broken - DLIB_CASSERT(is_learning_problem(samples, targets) == true, - "\t decision_function svr_linear_trainer::train(samples, targets)" - << "\n\t invalid inputs were given to this function" - << "\n\t samples.size(): " << samples.size() - << "\n\t targets.size(): " << targets.size() - << "\n\t is_learning_problem(samples,targets): " << is_learning_problem(samples,targets) - ); - - - typedef matrix<scalar_type,0,1> w_type; - w_type w; - - const unsigned long num_dims = max_index_plus_one(samples); - - unsigned long num_nonnegative = 0; - if (learn_nonnegative_weights) - { - num_nonnegative = num_dims; - } - - unsigned long force_weight_1_idx = std::numeric_limits<unsigned long>::max(); - if (last_weight_1) - { - force_weight_1_idx = num_dims-1; - } - - solver( make_oca_problem_linear_svr<w_type>(C, samples, targets, verbose, eps, eps_insensitivity, max_iterations), - w, - num_nonnegative, - force_weight_1_idx); - - - // put the solution into a decision function and then return it - decision_function<kernel_type> df; - df.b = static_cast<scalar_type>(w(w.size()-1)); - df.basis_vectors.set_size(1); - // Copy the plane normal into the output basis vector. The output vector might be a - // sparse vector container so we need to use this special kind of copy to handle that case. - // As an aside, the reason for using max_index_plus_one() and not just w.size()-1 is because - // doing it this way avoids an inane warning from gcc that can occur in some cases. - const long out_size = max_index_plus_one(samples); - assign(df.basis_vectors(0), matrix_cast<scalar_type>(colm(w, 0, out_size))); - df.alpha.set_size(1); - df.alpha(0) = 1; - - return df; - } - - private: - - scalar_type C; - oca solver; - scalar_type eps; - bool verbose; - unsigned long max_iterations; - bool learn_nonnegative_weights; - bool last_weight_1; - scalar_type eps_insensitivity; - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SVR_LINEAR_TrAINER_Hh_ - diff --git a/ml/dlib/dlib/svm/svr_linear_trainer_abstract.h b/ml/dlib/dlib/svm/svr_linear_trainer_abstract.h deleted file mode 100644 index c74310f06..000000000 --- a/ml/dlib/dlib/svm/svr_linear_trainer_abstract.h +++ /dev/null @@ -1,269 +0,0 @@ -// Copyright (C) 2013 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_SVR_LINEAR_TrAINER_ABSTRACT_Hh_ -#ifdef DLIB_SVR_LINEAR_TrAINER_ABSTRACT_Hh_ - -#include "sparse_vector_abstract.h" -#include "function_abstract.h" -#include "kernel_abstract.h" -#include "../algs.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename K - > - class svr_linear_trainer - { - /*! - REQUIREMENTS ON K - Is either linear_kernel or sparse_linear_kernel. - - WHAT THIS OBJECT REPRESENTS - This object implements a trainer for performing epsilon-insensitive support - vector regression. It uses the oca optimizer so it is very efficient at - solving this problem when linear kernels are used, making it suitable for - use with large datasets. - - For an introduction to support vector regression see the following paper: - A Tutorial on Support Vector Regression by Alex J. Smola and Bernhard Scholkopf. - Note that this object solves the version of support vector regression - defined by equation (3) in the paper, except that we incorporate the bias - term into the w vector by appending a 1 to the end of each sample. - !*/ - - public: - typedef K kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef decision_function<kernel_type> trained_function_type; - - svr_linear_trainer ( - ); - /*! - ensures - - This object is properly initialized and ready to be used to train a - ranking support vector machine. - - #get_oca() == oca() (i.e. an instance of oca with default parameters) - - #get_c() == 1 - - #get_epsilon() == 0.01 - - #get_epsilon_insensitivity() = 0.1 - - This object will not be verbose unless be_verbose() is called - - #get_max_iterations() == 10000 - - #learns_nonnegative_weights() == false - - #forces_last_weight_to_1() == false - !*/ - - explicit svr_linear_trainer ( - const scalar_type& C - ); - /*! - requires - - C > 0 - ensures - - This object is properly initialized and ready to be used to train a - ranking support vector machine. - - #get_oca() == oca() (i.e. an instance of oca with default parameters) - - #get_c() == C - - #get_epsilon() == 0.01 - - #get_epsilon_insensitivity() = 0.1 - - This object will not be verbose unless be_verbose() is called - - #get_max_iterations() == 10000 - - #learns_nonnegative_weights() == false - - #forces_last_weight_to_1() == false - !*/ - - void set_epsilon ( - scalar_type eps - ); - /*! - requires - - eps > 0 - ensures - - #get_epsilon() == eps - !*/ - - const scalar_type get_epsilon ( - ) const; - /*! - ensures - - returns the error epsilon that determines when training should stop. - Smaller values may result in a more accurate solution but take longer to - train. You can think of this epsilon value as saying "solve the - optimization problem until the average regression error is within epsilon - of its optimal value". See get_epsilon_insensitivity() below for a - definition of "regression error". - !*/ - - void set_epsilon_insensitivity ( - scalar_type eps - ); - /*! - requires - - eps > 0 - ensures - - #get_epsilon_insensitivity() == eps - !*/ - - const scalar_type get_epsilon_insensitivity ( - ) const; - /*! - ensures - - This object tries to find a function which minimizes the regression error - on a training set. This error is measured in the following way: - - if (abs(predicted_value - true_labeled_value) < eps) then - - The error is 0. That is, any function which gets within eps of - the correct output is good enough. - - else - - The error grows linearly once it gets bigger than eps. - - So epsilon-insensitive regression means we do regression but stop trying - to fit a data point once it is "close enough". This function returns - that eps value which controls what we mean by "close enough". - !*/ - - unsigned long get_max_iterations ( - ) const; - /*! - ensures - - returns the maximum number of iterations the SVM optimizer is allowed to - run before it is required to stop and return a result. - !*/ - - void set_max_iterations ( - unsigned long max_iter - ); - /*! - ensures - - #get_max_iterations() == max_iter - !*/ - - void be_verbose ( - ); - /*! - ensures - - This object will print status messages to standard out so that a user can - observe the progress of the algorithm. - !*/ - - void be_quiet ( - ); - /*! - ensures - - this object will not print anything to standard out - !*/ - - bool forces_last_weight_to_1 ( - ) const; - /*! - ensures - - returns true if this trainer has the constraint that the last weight in - the learned parameter vector must be 1. This is the weight corresponding - to the feature in the training vectors with the highest dimension. - - Forcing the last weight to 1 also disables the bias and therefore the b - field of the learned decision_function will be 0 when forces_last_weight_to_1() == true. - !*/ - - void force_last_weight_to_1 ( - bool should_last_weight_be_1 - ); - /*! - ensures - - #forces_last_weight_to_1() == should_last_weight_be_1 - !*/ - - void set_oca ( - const oca& item - ); - /*! - ensures - - #get_oca() == item - !*/ - - const oca get_oca ( - ) const; - /*! - ensures - - returns a copy of the optimizer used to solve the SVM problem. - !*/ - - const kernel_type get_kernel ( - ) const; - /*! - ensures - - returns a copy of the kernel function in use by this object. Since the - linear kernels don't have any parameters this function just returns - kernel_type() - !*/ - - bool learns_nonnegative_weights ( - ) const; - /*! - ensures - - The output of training is a weight vector and a bias value. These two - things define the resulting decision function. That is, the decision - function simply takes the dot product between the learned weight vector - and a test sample, then subtracts the bias value. Therefore, if - learns_nonnegative_weights() == true then the resulting learned weight - vector will always have non-negative entries. The bias value may still - be negative though. - !*/ - - void set_learns_nonnegative_weights ( - bool value - ); - /*! - ensures - - #learns_nonnegative_weights() == value - !*/ - - void set_c ( - scalar_type C - ); - /*! - requires - - C > 0 - ensures - - #get_c() == C - !*/ - - const scalar_type get_c ( - ) const; - /*! - ensures - - returns the SVM regularization parameter. It is the parameter that - determines the trade off between trying to fit the training data exactly - or allowing more errors but hopefully improving the generalization of the - resulting classifier. Larger values encourage exact fitting while - smaller values of C may encourage better generalization. - !*/ - - const decision_function<kernel_type> train ( - const std::vector<sample_type>& samples, - const std::vector<scalar_type>& targets - ) const; - /*! - requires - - is_learning_problem(samples,targets) == true - ensures - - performs support vector regression given the training samples and targets. - - returns a decision_function F with the following properties: - - F(new_sample) == predicted target value for new_sample - - F.alpha.size() == 1 - - F.basis_vectors.size() == 1 - - F.alpha(0) == 1 - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SVR_LINEAR_TrAINER_ABSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/svm/svr_trainer.h b/ml/dlib/dlib/svm/svr_trainer.h deleted file mode 100644 index bc6378a20..000000000 --- a/ml/dlib/dlib/svm/svr_trainer.h +++ /dev/null @@ -1,393 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_SVm_EPSILON_REGRESSION_TRAINER_Hh_ -#define DLIB_SVm_EPSILON_REGRESSION_TRAINER_Hh_ - - -#include "svr_trainer_abstract.h" -#include <cmath> -#include <limits> -#include "../matrix.h" -#include "../algs.h" - -#include "function.h" -#include "kernel.h" -#include "../optimization/optimization_solve_qp3_using_smo.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename K - > - class svr_trainer - { - public: - typedef K kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef decision_function<kernel_type> trained_function_type; - - svr_trainer ( - ) : - C(1), - eps_insensitivity(0.1), - cache_size(200), - eps(0.001) - { - } - - void set_cache_size ( - long cache_size_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(cache_size_ > 0, - "\tvoid svr_trainer::set_cache_size(cache_size_)" - << "\n\t invalid inputs were given to this function" - << "\n\t cache_size: " << cache_size_ - ); - cache_size = cache_size_; - } - - long get_cache_size ( - ) const - { - return cache_size; - } - - void set_epsilon ( - scalar_type eps_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(eps_ > 0, - "\tvoid svr_trainer::set_epsilon(eps_)" - << "\n\t invalid inputs were given to this function" - << "\n\t eps_: " << eps_ - ); - eps = eps_; - } - - const scalar_type get_epsilon ( - ) const - { - return eps; - } - - void set_epsilon_insensitivity ( - scalar_type eps_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(eps_ > 0, - "\tvoid svr_trainer::set_epsilon_insensitivity(eps_)" - << "\n\t invalid inputs were given to this function" - << "\n\t eps_: " << eps_ - ); - eps_insensitivity = eps_; - } - - const scalar_type get_epsilon_insensitivity ( - ) const - { - return eps_insensitivity; - } - - void set_kernel ( - const kernel_type& k - ) - { - kernel_function = k; - } - - const kernel_type& get_kernel ( - ) const - { - return kernel_function; - } - - void set_c ( - scalar_type C_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(C_ > 0, - "\t void svr_trainer::set_c()" - << "\n\t C must be greater than 0" - << "\n\t C_: " << C_ - << "\n\t this: " << this - ); - - C = C_; - } - - const scalar_type get_c ( - ) const - { - return C; - } - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const - { - return do_train(mat(x), mat(y)); - } - - void swap ( - svr_trainer& item - ) - { - exchange(kernel_function, item.kernel_function); - exchange(C, item.C); - exchange(eps_insensitivity, item.eps_insensitivity); - exchange(cache_size, item.cache_size); - exchange(eps, item.eps); - } - - private: - - // ------------------------------------------------------------------------------------ - - template <typename M> - struct op_quad - { - explicit op_quad( - const M& m_ - ) : m(m_) {} - - const M& m; - - typedef typename M::type type; - typedef type const_ret_type; - const static long cost = M::cost + 2; - - inline const_ret_type apply ( long r, long c) const - { - if (r < m.nr()) - { - if (c < m.nc()) - { - return m(r,c); - } - else - { - return -m(r,c-m.nc()); - } - } - else - { - if (c < m.nc()) - { - return -m(r-m.nr(),c); - } - else - { - return m(r-m.nr(),c-m.nc()); - } - } - } - - const static long NR = 2*M::NR; - const static long NC = 2*M::NC; - typedef typename M::mem_manager_type mem_manager_type; - typedef typename M::layout_type layout_type; - - long nr () const { return 2*m.nr(); } - long nc () const { return 2*m.nc(); } - - template <typename U> bool aliases ( const matrix_exp<U>& item) const - { return m.aliases(item); } - template <typename U> bool destructively_aliases ( const matrix_exp<U>& item) const - { return m.aliases(item); } - }; - - template < - typename EXP - > - const matrix_op<op_quad<EXP> > make_quad ( - const matrix_exp<EXP>& m - ) const - /*! - ensures - - returns the following matrix: - m -m - -m m - - I.e. returns a matrix that is twice the size of m and just - contains copies of m and -m - !*/ - { - typedef op_quad<EXP> op; - return matrix_op<op>(op(m.ref())); - } - - // ------------------------------------------------------------------------------------ - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> do_train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const - { - typedef typename K::scalar_type scalar_type; - typedef typename decision_function<K>::sample_vector_type sample_vector_type; - typedef typename decision_function<K>::scalar_vector_type scalar_vector_type; - - // make sure requires clause is not broken - DLIB_ASSERT(is_learning_problem(x,y) == true, - "\tdecision_function svr_trainer::train(x,y)" - << "\n\t invalid inputs were given to this function" - << "\n\t x.nr(): " << x.nr() - << "\n\t y.nr(): " << y.nr() - << "\n\t x.nc(): " << x.nc() - << "\n\t y.nc(): " << y.nc() - ); - - - scalar_vector_type alpha; - - solve_qp3_using_smo<scalar_vector_type> solver; - - solver(symmetric_matrix_cache<float>(make_quad(kernel_matrix(kernel_function,x)), cache_size), - uniform_matrix<scalar_type>(2*x.size(),1, eps_insensitivity) + join_cols(y,-y), - join_cols(uniform_matrix<scalar_type>(x.size(),1,1), uniform_matrix<scalar_type>(x.size(),1,-1)), - 0, - C, - C, - alpha, - eps); - - scalar_type b; - calculate_b(alpha,solver.get_gradient(),C,b); - - alpha = -rowm(alpha,range(0,x.size()-1)) + rowm(alpha,range(x.size(), alpha.size()-1)); - - // count the number of support vectors - const long sv_count = (long)sum(alpha != 0); - - scalar_vector_type sv_alpha; - sample_vector_type support_vectors; - - // size these column vectors so that they have an entry for each support vector - sv_alpha.set_size(sv_count); - support_vectors.set_size(sv_count); - - // load the support vectors and their alpha values into these new column matrices - long idx = 0; - for (long i = 0; i < alpha.nr(); ++i) - { - if (alpha(i) != 0) - { - sv_alpha(idx) = alpha(i); - support_vectors(idx) = x(i); - ++idx; - } - } - - // now return the decision function - return decision_function<K> (sv_alpha, -b, kernel_function, support_vectors); - } - - // ------------------------------------------------------------------------------------ - - template < - typename scalar_vector_type - > - void calculate_b( - const scalar_vector_type& alpha, - const scalar_vector_type& df, - const scalar_type& C, - scalar_type& b - ) const - { - using namespace std; - long num_free = 0; - scalar_type sum_free = 0; - - scalar_type upper_bound = -numeric_limits<scalar_type>::infinity(); - scalar_type lower_bound = numeric_limits<scalar_type>::infinity(); - - find_min_and_max(df, upper_bound, lower_bound); - - for(long i = 0; i < alpha.nr(); ++i) - { - if(i < alpha.nr()/2) - { - if(alpha(i) == C) - { - if (df(i) > upper_bound) - upper_bound = df(i); - } - else if(alpha(i) == 0) - { - if (df(i) < lower_bound) - lower_bound = df(i); - } - else - { - ++num_free; - sum_free += df(i); - } - } - else - { - if(alpha(i) == C) - { - if (-df(i) < lower_bound) - lower_bound = -df(i); - } - else if(alpha(i) == 0) - { - if (-df(i) > upper_bound) - upper_bound = -df(i); - } - else - { - ++num_free; - sum_free -= df(i); - } - } - } - - if(num_free > 0) - b = sum_free/num_free; - else - b = (upper_bound+lower_bound)/2; - } - - // ------------------------------------------------------------------------------------ - - - kernel_type kernel_function; - scalar_type C; - scalar_type eps_insensitivity; - long cache_size; - scalar_type eps; - }; // end of class svr_trainer - -// ---------------------------------------------------------------------------------------- - - template <typename K> - void swap ( - svr_trainer<K>& a, - svr_trainer<K>& b - ) { a.swap(b); } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SVm_EPSILON_REGRESSION_TRAINER_Hh_ - diff --git a/ml/dlib/dlib/svm/svr_trainer_abstract.h b/ml/dlib/dlib/svm/svr_trainer_abstract.h deleted file mode 100644 index c1dd5f1f3..000000000 --- a/ml/dlib/dlib/svm/svr_trainer_abstract.h +++ /dev/null @@ -1,209 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_SVm_EPSILON_REGRESSION_TRAINER_ABSTRACT_ -#ifdef DLIB_SVm_EPSILON_REGRESSION_TRAINER_ABSTRACT_ - -#include <cmath> -#include <limits> -#include "../matrix/matrix_abstract.h" -#include "../algs.h" -#include "function_abstract.h" -#include "kernel_abstract.h" -#include "../optimization/optimization_solve_qp3_using_smo_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename K - > - class svr_trainer - { - /*! - REQUIREMENTS ON K - is a kernel function object as defined in dlib/svm/kernel_abstract.h - - WHAT THIS OBJECT REPRESENTS - This object implements a trainer for performing epsilon-insensitive support - vector regression. It is implemented using the SMO algorithm. - - The implementation of the eps-SVR training algorithm used by this object is based - on the following paper: - - Chih-Chung Chang and Chih-Jen Lin, LIBSVM : a library for support vector - machines, 2001. Software available at http://www.csie.ntu.edu.tw/~cjlin/libsvm - !*/ - - public: - typedef K kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef decision_function<kernel_type> trained_function_type; - - svr_trainer ( - ); - /*! - ensures - - This object is properly initialized and ready to be used - to train a support vector machine. - - #get_c() == 1 - - #get_epsilon_insensitivity() == 0.1 - - #get_cache_size() == 200 - - #get_epsilon() == 0.001 - !*/ - - void set_cache_size ( - long cache_size - ); - /*! - requires - - cache_size > 0 - ensures - - #get_cache_size() == cache_size - !*/ - - const long get_cache_size ( - ) const; - /*! - ensures - - returns the number of megabytes of cache this object will use - when it performs training via the this->train() function. - (bigger values of this may make training go faster but won't affect - the result. However, too big a value will cause you to run out of - memory, obviously.) - !*/ - - void set_epsilon ( - scalar_type eps - ); - /*! - requires - - eps > 0 - ensures - - #get_epsilon() == eps - !*/ - - const scalar_type get_epsilon ( - ) const; - /*! - ensures - - returns the error epsilon that determines when training should stop. - Generally a good value for this is 0.001. Smaller values may result - in a more accurate solution but take longer to execute. - !*/ - - void set_epsilon_insensitivity ( - scalar_type eps - ); - /*! - requires - - eps > 0 - ensures - - #get_epsilon_insensitivity() == eps - !*/ - - const scalar_type get_epsilon_insensitivity ( - ) const; - /*! - ensures - - This object tries to find a function which minimizes the - regression error on a training set. This error is measured - in the following way: - - if (abs(predicted_value - true_labeled_value) < eps) then - - The error is 0. That is, any function which gets within - eps of the correct output is good enough. - - else - - The error grows linearly once it gets bigger than eps - - So epsilon-insensitive regression means we do regression but - stop trying to fit a data point once it is "close enough". - This function returns that eps value which controls what we - mean by "close enough". - !*/ - - void set_kernel ( - const kernel_type& k - ); - /*! - ensures - - #get_kernel() == k - !*/ - - const kernel_type& get_kernel ( - ) const; - /*! - ensures - - returns a copy of the kernel function in use by this object - !*/ - - void set_c ( - scalar_type C - ); - /*! - requires - - C > 0 - ensures - - #get_c() == C - !*/ - - const scalar_type get_c ( - ) const; - /*! - ensures - - returns the SVR regularization parameter. It is the parameter that - determines the trade-off between trying to reduce the training error - or allowing more errors but hopefully improving the generalization - of the resulting decision_function. Larger values encourage exact - fitting while smaller values of C may encourage better generalization. - !*/ - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const; - /*! - requires - - is_learning_problem(x,y) == true - - x == a matrix or something convertible to a matrix via mat(). - Also, x should contain sample_type objects. - - y == a matrix or something convertible to a matrix via mat(). - Also, y should contain scalar_type objects. - ensures - - performs support vector regression given the training samples in x and - target values in y. - - returns a decision_function F with the following properties: - - F(new_x) == predicted y value - !*/ - - void swap ( - svr_trainer& item - ); - /*! - ensures - - swaps *this and item - !*/ - }; - - template <typename K> - void swap ( - svr_trainer<K>& a, - svr_trainer<K>& b - ) { a.swap(b); } - /*! - provides a global swap - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SVm_EPSILON_REGRESSION_TRAINER_ABSTRACT_ - - - diff --git a/ml/dlib/dlib/svm/track_association_function.h b/ml/dlib/dlib/svm/track_association_function.h deleted file mode 100644 index bf5ef36c7..000000000 --- a/ml/dlib/dlib/svm/track_association_function.h +++ /dev/null @@ -1,154 +0,0 @@ -// Copyright (C) 2014 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_TRACK_ASSOCiATION_FUNCTION_Hh_ -#define DLIB_TRACK_ASSOCiATION_FUNCTION_Hh_ - - -#include "track_association_function_abstract.h" -#include <vector> -#include <iostream> -#include "../algs.h" -#include "../serialize.h" -#include "assignment_function.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename detection_type - > - class feature_extractor_track_association - { - public: - typedef typename detection_type::track_type track_type; - typedef typename track_type::feature_vector_type feature_vector_type; - - typedef detection_type lhs_element; - typedef track_type rhs_element; - - feature_extractor_track_association() : num_dims(0), num_nonnegative(0) {} - - explicit feature_extractor_track_association ( - unsigned long num_dims_, - unsigned long num_nonnegative_ - ) : num_dims(num_dims_), num_nonnegative(num_nonnegative_) {} - - unsigned long num_features( - ) const { return num_dims; } - - unsigned long num_nonnegative_weights ( - ) const { return num_nonnegative; } - - void get_features ( - const detection_type& det, - const track_type& track, - feature_vector_type& feats - ) const - { - track.get_similarity_features(det, feats); - } - - friend void serialize (const feature_extractor_track_association& item, std::ostream& out) - { - serialize(item.num_dims, out); - serialize(item.num_nonnegative, out); - } - - friend void deserialize (feature_extractor_track_association& item, std::istream& in) - { - deserialize(item.num_dims, in); - deserialize(item.num_nonnegative, in); - } - - private: - unsigned long num_dims; - unsigned long num_nonnegative; - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename detection_type_ - > - class track_association_function - { - public: - - typedef detection_type_ detection_type; - typedef typename detection_type::track_type track_type; - typedef assignment_function<feature_extractor_track_association<detection_type> > association_function_type; - - track_association_function() {} - - track_association_function ( - const association_function_type& assoc_ - ) : assoc(assoc_) - { - } - - const association_function_type& get_assignment_function ( - ) const - { - return assoc; - } - - void operator() ( - std::vector<track_type>& tracks, - const std::vector<detection_type>& dets - ) const - { - std::vector<long> assignments = assoc(dets, tracks); - std::vector<bool> updated_track(tracks.size(), false); - // now update all the tracks with the detections that associated to them. - for (unsigned long i = 0; i < assignments.size(); ++i) - { - if (assignments[i] != -1) - { - tracks[assignments[i]].update_track(dets[i]); - updated_track[assignments[i]] = true; - } - else - { - track_type new_track; - new_track.update_track(dets[i]); - tracks.push_back(new_track); - } - } - - // Now propagate all the tracks that didn't get any detections. - for (unsigned long i = 0; i < updated_track.size(); ++i) - { - if (!updated_track[i]) - tracks[i].propagate_track(); - } - } - - friend void serialize (const track_association_function& item, std::ostream& out) - { - int version = 1; - serialize(version, out); - serialize(item.assoc, out); - } - friend void deserialize (track_association_function& item, std::istream& in) - { - int version = 0; - deserialize(version, in); - if (version != 1) - throw serialization_error("Unexpected version found while deserializing dlib::track_association_function."); - - deserialize(item.assoc, in); - } - - private: - - assignment_function<feature_extractor_track_association<detection_type> > assoc; - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_TRACK_ASSOCiATION_FUNCTION_Hh_ - diff --git a/ml/dlib/dlib/svm/track_association_function_abstract.h b/ml/dlib/dlib/svm/track_association_function_abstract.h deleted file mode 100644 index 8a6fe153c..000000000 --- a/ml/dlib/dlib/svm/track_association_function_abstract.h +++ /dev/null @@ -1,271 +0,0 @@ -// Copyright (C) 2014 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_TRACK_ASSOCiATION_FUNCTION_ABSTRACT_Hh_ -#ifdef DLIB_TRACK_ASSOCiATION_FUNCTION_ABSTRACT_Hh_ - -#include <vector> -#include "assignment_function_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - class example_detection - { - /*! - WHAT THIS OBJECT REPRESENTS - This object defines the interface a detection must implement if it is to be - used with the track_association_function defined at the bottom of this - file. In this case, the interface is very simple. A detection object is - only required to define the track_type typedef and it must also be possible - to store detection objects in a std::vector. - !*/ - - public: - // Each detection object should be designed to work with a specific track object. - // This typedef lets us determine which track type is meant for use with this - // detection object. - typedef class example_track track_type; - - }; - -// ---------------------------------------------------------------------------------------- - - class example_track - { - /*! - WHAT THIS OBJECT REPRESENTS - This object defines the interface a track must implement if it is to be - used with the track_association_function defined at the bottom of this - file. - !*/ - - public: - // This type should be a dlib::matrix capable of storing column vectors or an - // unsorted sparse vector type as defined in dlib/svm/sparse_vector_abstract.h. - typedef matrix_or_sparse_vector_type feature_vector_type; - - example_track( - ); - /*! - ensures - - this object is properly initialized - !*/ - - void get_similarity_features ( - const example_detection& det, - feature_vector_type& feats - ) const; - /*! - requires - - update_track() has been called on this track at least once. - ensures - - #feats == A feature vector that contains information describing how - likely it is that det is a detection from the object corresponding to - this track. That is, the feature vector should contain information that - lets someone decide if det should be associated to this track. - - #feats.size() must be a constant. That is, every time we call - get_similarity_features() it must output a feature vector of the same - dimensionality. - !*/ - - void update_track ( - const example_detection& det - ); - /*! - ensures - - Updates this track with the given detection assuming that det is the most - current observation of the object under track. - !*/ - - void propagate_track ( - ); - /*! - ensures - - propagates this track forward in time one time step. - !*/ - }; - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename detection_type - > - class feature_extractor_track_association - { - /*! - REQUIREMENTS ON detection_type - It must be an object that implements an interface compatible with the - example_detection discussed above. This also means that detection_type::track_type - must be an object that implements an interface compatible with example_track - defined above. - - WHAT THIS OBJECT REPRESENTS - This object is an adapter that converts from the detection/track style - interface defined above to the feature extraction interface required by the - association rule learning tools in dlib. Specifically, it converts the - detection/track interface into a form usable by the assignment_function and - its trainer object structural_assignment_trainer. - !*/ - - public: - typedef typename detection_type::track_type track_type; - typedef typename track_type::feature_vector_type feature_vector_type; - typedef detection_type lhs_element; - typedef track_type rhs_element; - - unsigned long num_features( - ) const; - /*! - ensures - - returns the dimensionality of the feature vectors produced by get_features(). - !*/ - - void get_features ( - const detection_type& det, - const track_type& track, - feature_vector_type& feats - ) const; - /*! - ensures - - performs: track.get_similarity_features(det, feats); - !*/ - }; - - template < - typename detection_type - > - void serialize ( - const feature_extractor_track_association<detection_type>& item, - std::ostream& out - ); - /*! - Provides serialization support. - !*/ - - template < - typename detection_type - > - void deserialize ( - feature_extractor_track_association<detection_type>& item, - std::istream& in - ); - /*! - Provides deserialization support. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename detection_type_ - > - class track_association_function - { - /*! - REQUIREMENTS ON detection_type - It must be an object that implements an interface compatible with the - example_detection discussed above. This also means that detection_type::track_type - must be an object that implements an interface compatible with example_track - defined above. - - WHAT THIS OBJECT REPRESENTS - This object is a tool that helps you implement an object tracker. So for - example, if you wanted to track people moving around in a video then this - object can help. In particular, imagine you have a tool for detecting the - positions of each person in an image. Then you can run this person - detector on the video and at each time step, i.e. at each frame, you get a - set of person detections. However, that by itself doesn't tell you how - many people there are in the video and where they are moving to and from. - To get that information you need to figure out which detections match each - other from frame to frame. This is where the track_association_function - comes in. It performs the detection to track association. It will also do - some of the track management tasks like creating a new track when a - detection doesn't match any of the existing tracks. - - Internally, this object is implemented using the assignment_function object. - In fact, it's really just a thin wrapper around assignment_function and - exists just to provide a more convenient interface to users doing detection - to track association. - !*/ - public: - - typedef detection_type_ detection_type; - typedef typename detection_type::track_type track_type; - typedef assignment_function<feature_extractor_track_association<detection_type> > association_function_type; - - track_association_function( - ); - /*! - ensures - - #get_assignment_function() will be default initialized. - !*/ - - track_association_function ( - const association_function_type& assoc - ); - /*! - ensures - - #get_assignment_function() == assoc - !*/ - - const association_function_type& get_assignment_function ( - ) const; - /*! - ensures - - returns the assignment_function used by this object to assign detections - to tracks. - !*/ - - void operator() ( - std::vector<track_type>& tracks, - const std::vector<detection_type>& dets - ) const; - /*! - ensures - - This function uses get_assignment_function() to assign each detection - in dets to its appropriate track in tracks. Then each track which - associates to a detection is updated by calling update_track() with the - associated detection. - - Detections that don't associate with any of the elements of tracks will - spawn new tracks. For each unassociated detection, this is done by - creating a new track_type object, calling update_track() on it with the - new detection, and then adding the new track into tracks. - - Tracks that don't have a detection associate to them are propagated - forward in time by calling propagate_track() on them. That is, we call - propagate_track() only on tracks that do not get associated with a - detection. - !*/ - }; - - template < - typename detection_type - > - void serialize ( - const track_association_function<detection_type>& item, - std::ostream& out - ); - /*! - Provides serialization support. - !*/ - - template < - typename detection_type - > - void deserialize ( - track_association_function<detection_type>& item, - std::istream& in - ); - /*! - Provides deserialization support. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_TRACK_ASSOCiATION_FUNCTION_ABSTRACT_Hh_ - - |