summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/svm/active_learning_abstract.h
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/dlib/svm/active_learning_abstract.h')
-rw-r--r--ml/dlib/dlib/svm/active_learning_abstract.h75
1 files changed, 75 insertions, 0 deletions
diff --git a/ml/dlib/dlib/svm/active_learning_abstract.h b/ml/dlib/dlib/svm/active_learning_abstract.h
new file mode 100644
index 000000000..76a5120e3
--- /dev/null
+++ b/ml/dlib/dlib/svm/active_learning_abstract.h
@@ -0,0 +1,75 @@
+// Copyright (C) 2012 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_ACTIVE_LEARnING_ABSTRACT_Hh_
+#ifdef DLIB_ACTIVE_LEARnING_ABSTRACT_Hh_
+
+#include "svm_c_linear_dcd_trainer_abstract.h"
+#include <vector>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ enum active_learning_mode
+ {
+ max_min_margin,
+ ratio_margin
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename kernel_type,
+ typename in_sample_vector_type,
+ typename in_scalar_vector_type,
+ typename in_sample_vector_type2
+ >
+ std::vector<unsigned long> rank_unlabeled_training_samples (
+ const svm_c_linear_dcd_trainer<kernel_type>& trainer,
+ const in_sample_vector_type& samples,
+ const in_scalar_vector_type& labels,
+ const in_sample_vector_type2& unlabeled_samples,
+ const active_learning_mode mode = max_min_margin
+ );
+ /*!
+ requires
+ - if (samples.size() != 0) then
+ - it must be legal to call trainer.train(samples, labels)
+ - is_learning_problem(samples, labels) == true
+ - unlabeled_samples must contain the same kind of vectors as samples.
+ - unlabeled_samples, samples, and labels must be matrices or types of
+ objects convertible to a matrix via mat().
+ - is_vector(unlabeled_samples) == true
+ ensures
+ - Suppose that we wish to learn a binary classifier by calling
+ trainer.train(samples, labels) but we are also interested in selecting one of
+ the elements of unlabeled_samples to add to our training data. Since doing
+ this requires us to find out the label of the sample, a potentially tedious
+ or expensive process, we would like to select the "best" element from
+ unlabeled_samples for labeling. The rank_unlabeled_training_samples()
+ attempts to find this "best" element. In particular, this function returns a
+ ranked list of all the elements in unlabeled_samples such that that the
+ "best" elements come first.
+ - The method used by this function is described in the paper:
+ Support Vector Machine Active Learning with Applications to Text Classification
+ by Simon Tong and Daphne Koller
+ In particular, this function implements the MaxMin Margin and Ratio Margin
+ selection strategies described in the paper. Moreover, the mode argument
+ to this function selects which of these strategies is used.
+ - returns a std::vector V such that:
+ - V contains a list of all the indices from unlabeled_samples. Moreover,
+ they are ordered so that the most useful samples come first.
+ - V.size() == unlabeled_samples.size()
+ - unlabeled_samples[V[0]] == The best sample to add into the training set.
+ - unlabeled_samples[V[1]] == The second best sample to add into the training set.
+ - unlabeled_samples[V[i]] == The i-th best sample to add into the training set.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_ACTIVE_LEARnING_ABSTRACT_Hh_
+
+