summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/svm/active_learning.h
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-03-09 13:19:48 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-03-09 13:20:02 +0000
commit58daab21cd043e1dc37024a7f99b396788372918 (patch)
tree96771e43bb69f7c1c2b0b4f7374cb74d7866d0cb /ml/dlib/dlib/svm/active_learning.h
parentReleasing debian version 1.43.2-1. (diff)
downloadnetdata-58daab21cd043e1dc37024a7f99b396788372918.tar.xz
netdata-58daab21cd043e1dc37024a7f99b396788372918.zip
Merging upstream version 1.44.3.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'ml/dlib/dlib/svm/active_learning.h')
-rw-r--r--ml/dlib/dlib/svm/active_learning.h162
1 files changed, 162 insertions, 0 deletions
diff --git a/ml/dlib/dlib/svm/active_learning.h b/ml/dlib/dlib/svm/active_learning.h
new file mode 100644
index 000000000..581540e67
--- /dev/null
+++ b/ml/dlib/dlib/svm/active_learning.h
@@ -0,0 +1,162 @@
+// Copyright (C) 2012 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_ACTIVE_LEARnING_Hh_
+#define DLIB_ACTIVE_LEARnING_Hh_
+
+#include "active_learning_abstract.h"
+
+#include "svm_c_linear_dcd_trainer.h"
+#include <vector>
+
+namespace dlib
+{
+
+ enum active_learning_mode
+ {
+ max_min_margin,
+ ratio_margin
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename kernel_type,
+ typename in_sample_vector_type,
+ typename in_scalar_vector_type,
+ typename in_sample_vector_type2
+ >
+ std::vector<unsigned long> impl_rank_unlabeled_training_samples (
+ const svm_c_linear_dcd_trainer<kernel_type>& trainer,
+ const in_sample_vector_type& samples,
+ const in_scalar_vector_type& labels,
+ const in_sample_vector_type2& unlabeled_samples,
+ const active_learning_mode mode
+ )
+ {
+ DLIB_ASSERT(is_vector(unlabeled_samples) &&
+ (samples.size() == 0 || is_learning_problem(samples, labels)) ,
+ "\t std::vector<unsigned long> rank_unlabeled_training_samples()"
+ << "\n\t Invalid inputs were given to this function"
+ << "\n\t is_vector(unlabeled_samples): " << is_vector(unlabeled_samples)
+ << "\n\t is_learning_problem(samples, labels): " << is_learning_problem(samples, labels)
+ << "\n\t samples.size(): " << samples.size()
+ << "\n\t labels.size(): " << labels.size()
+ );
+
+ // If there aren't any training samples then all unlabeled_samples are equally good.
+ // So just report an arbitrary ordering.
+ if (samples.size() == 0 || unlabeled_samples.size() == 0)
+ {
+ std::vector<unsigned long> ret(unlabeled_samples.size());
+ for (unsigned long i = 0; i < ret.size(); ++i)
+ ret[i] = i;
+
+ return ret;
+ }
+
+ // We are going to score each unlabeled sample and put the score and index into
+ // results. Then at the end of this function we just sort it and return the indices.
+ std::vector<std::pair<double, unsigned long> > results;
+ results.resize(unlabeled_samples.size());
+
+ // make sure we use this trainer's ability to warm start itself since that will make
+ // this whole function run a lot faster. But first, we need to find out what the state
+ // we will be warm starting from is.
+ typedef typename svm_c_linear_dcd_trainer<kernel_type>::optimizer_state optimizer_state;
+ optimizer_state state;
+ trainer.train(samples, labels, state); // call train() just to get state
+
+ decision_function<kernel_type> df;
+
+ std::vector<typename kernel_type::sample_type> temp_samples;
+ std::vector<typename kernel_type::scalar_type> temp_labels;
+ temp_samples.reserve(samples.size()+1);
+ temp_labels.reserve(labels.size()+1);
+ temp_samples.assign(samples.begin(), samples.end());
+ temp_labels.assign(labels.begin(), labels.end());
+ temp_samples.resize(temp_samples.size()+1);
+ temp_labels.resize(temp_labels.size()+1);
+
+
+ for (long i = 0; i < unlabeled_samples.size(); ++i)
+ {
+ temp_samples.back() = unlabeled_samples(i);
+ // figure out the margin for each possible labeling of this sample.
+
+ optimizer_state temp(state);
+ temp_labels.back() = +1;
+ df = trainer.train(temp_samples, temp_labels, temp);
+ const double margin_p = temp_labels.back()*df(temp_samples.back());
+
+ temp = state;
+ temp_labels.back() = -1;
+ df = trainer.train(temp_samples, temp_labels, temp);
+ const double margin_n = temp_labels.back()*df(temp_samples.back());
+
+ if (mode == max_min_margin)
+ {
+ // The score for this sample is its min possible margin over possible labels.
+ // Therefore, this score measures how much flexibility we have to label this
+ // sample however we want. The intuition being that the most useful points to
+ // label are the ones that are still free to obtain either label.
+ results[i] = std::make_pair(std::min(margin_p, margin_n), i);
+ }
+ else
+ {
+ // In this case, the score for the sample is a ratio that tells how close the
+ // two margin values are to each other. The closer they are the better. So in
+ // this case we are saying we are looking for samples that have the same
+ // preference for either class label.
+ if (std::abs(margin_p) >= std::abs(margin_n))
+ {
+ if (margin_p != 0)
+ results[i] = std::make_pair(margin_n/margin_p, i);
+ else // if both are == 0 then say 0/0 == 1
+ results[i] = std::make_pair(1, i);
+ }
+ else
+ {
+ results[i] = std::make_pair(margin_p/margin_n, i);
+ }
+ }
+ }
+
+ // sort the results so the highest scoring samples come first.
+ std::sort(results.rbegin(), results.rend());
+
+ // transfer results into a vector with just sample indices so we can return it.
+ std::vector<unsigned long> ret(results.size());
+ for (unsigned long i = 0; i < ret.size(); ++i)
+ ret[i] = results[i].second;
+ return ret;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename kernel_type,
+ typename in_sample_vector_type,
+ typename in_scalar_vector_type,
+ typename in_sample_vector_type2
+ >
+ std::vector<unsigned long> rank_unlabeled_training_samples (
+ const svm_c_linear_dcd_trainer<kernel_type>& trainer,
+ const in_sample_vector_type& samples,
+ const in_scalar_vector_type& labels,
+ const in_sample_vector_type2& unlabeled_samples,
+ const active_learning_mode mode = max_min_margin
+ )
+ {
+ return impl_rank_unlabeled_training_samples(trainer,
+ mat(samples),
+ mat(labels),
+ mat(unlabeled_samples),
+ mode);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_ACTIVE_LEARnING_Hh_
+