diff options
Diffstat (limited to 'ml/dlib/dlib/svm/active_learning_abstract.h')
-rw-r--r-- | ml/dlib/dlib/svm/active_learning_abstract.h | 75 |
1 files changed, 75 insertions, 0 deletions
diff --git a/ml/dlib/dlib/svm/active_learning_abstract.h b/ml/dlib/dlib/svm/active_learning_abstract.h new file mode 100644 index 000000000..76a5120e3 --- /dev/null +++ b/ml/dlib/dlib/svm/active_learning_abstract.h @@ -0,0 +1,75 @@ +// Copyright (C) 2012 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_ACTIVE_LEARnING_ABSTRACT_Hh_ +#ifdef DLIB_ACTIVE_LEARnING_ABSTRACT_Hh_ + +#include "svm_c_linear_dcd_trainer_abstract.h" +#include <vector> + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + enum active_learning_mode + { + max_min_margin, + ratio_margin + }; + +// ---------------------------------------------------------------------------------------- + + template < + typename kernel_type, + typename in_sample_vector_type, + typename in_scalar_vector_type, + typename in_sample_vector_type2 + > + std::vector<unsigned long> rank_unlabeled_training_samples ( + const svm_c_linear_dcd_trainer<kernel_type>& trainer, + const in_sample_vector_type& samples, + const in_scalar_vector_type& labels, + const in_sample_vector_type2& unlabeled_samples, + const active_learning_mode mode = max_min_margin + ); + /*! + requires + - if (samples.size() != 0) then + - it must be legal to call trainer.train(samples, labels) + - is_learning_problem(samples, labels) == true + - unlabeled_samples must contain the same kind of vectors as samples. + - unlabeled_samples, samples, and labels must be matrices or types of + objects convertible to a matrix via mat(). + - is_vector(unlabeled_samples) == true + ensures + - Suppose that we wish to learn a binary classifier by calling + trainer.train(samples, labels) but we are also interested in selecting one of + the elements of unlabeled_samples to add to our training data. Since doing + this requires us to find out the label of the sample, a potentially tedious + or expensive process, we would like to select the "best" element from + unlabeled_samples for labeling. The rank_unlabeled_training_samples() + attempts to find this "best" element. In particular, this function returns a + ranked list of all the elements in unlabeled_samples such that that the + "best" elements come first. + - The method used by this function is described in the paper: + Support Vector Machine Active Learning with Applications to Text Classification + by Simon Tong and Daphne Koller + In particular, this function implements the MaxMin Margin and Ratio Margin + selection strategies described in the paper. Moreover, the mode argument + to this function selects which of these strategies is used. + - returns a std::vector V such that: + - V contains a list of all the indices from unlabeled_samples. Moreover, + they are ordered so that the most useful samples come first. + - V.size() == unlabeled_samples.size() + - unlabeled_samples[V[0]] == The best sample to add into the training set. + - unlabeled_samples[V[1]] == The second best sample to add into the training set. + - unlabeled_samples[V[i]] == The i-th best sample to add into the training set. + !*/ + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_ACTIVE_LEARnING_ABSTRACT_Hh_ + + |