summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/svm/svm_threaded.h
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/dlib/svm/svm_threaded.h')
-rw-r--r--ml/dlib/dlib/svm/svm_threaded.h253
1 files changed, 253 insertions, 0 deletions
diff --git a/ml/dlib/dlib/svm/svm_threaded.h b/ml/dlib/dlib/svm/svm_threaded.h
new file mode 100644
index 000000000..37927456b
--- /dev/null
+++ b/ml/dlib/dlib/svm/svm_threaded.h
@@ -0,0 +1,253 @@
+// Copyright (C) 2008 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_SVm_THREADED_
+#define DLIB_SVm_THREADED_
+
+#include <cmath>
+#include <iostream>
+#include <limits>
+#include <sstream>
+#include <vector>
+
+#include "svm_threaded_abstract.h"
+#include "svm.h"
+#include "../matrix.h"
+#include "../algs.h"
+#include "../serialize.h"
+#include "function.h"
+#include "kernel.h"
+#include "../threads.h"
+#include "../pipe.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ namespace cvtti_helpers
+ {
+ template <typename trainer_type, typename in_sample_vector_type>
+ struct job
+ {
+ typedef typename trainer_type::scalar_type scalar_type;
+ typedef typename trainer_type::sample_type sample_type;
+ typedef typename trainer_type::mem_manager_type mem_manager_type;
+ typedef matrix<sample_type,0,1,mem_manager_type> sample_vector_type;
+ typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type;
+
+ job() : x(0) {}
+
+ trainer_type trainer;
+ matrix<long,0,1> x_test, x_train;
+ scalar_vector_type y_test, y_train;
+ const in_sample_vector_type* x;
+ };
+
+ struct task
+ {
+ template <
+ typename trainer_type,
+ typename mem_manager_type,
+ typename in_sample_vector_type
+ >
+ void operator()(
+ job<trainer_type,in_sample_vector_type>& j,
+ matrix<double,1,2,mem_manager_type>& result
+ )
+ {
+ try
+ {
+ result = test_binary_decision_function(j.trainer.train(rowm(*j.x,j.x_train), j.y_train), rowm(*j.x,j.x_test), j.y_test);
+
+ // Do this just to make j release it's memory since people might run threaded cross validation
+ // on very large datasets. Every bit of freed memory helps out.
+ j = job<trainer_type,in_sample_vector_type>();
+ }
+ catch (invalid_nu_error&)
+ {
+ // If this is a svm_nu_trainer then we might get this exception if the nu is
+ // invalid. In this case just return a cross validation score of 0.
+ result = 0;
+ }
+ catch (std::bad_alloc&)
+ {
+ std::cerr << "\nstd::bad_alloc thrown while running cross_validate_trainer_threaded(). Not enough memory.\n" << std::endl;
+ throw;
+ }
+ }
+ };
+ }
+
+ template <
+ typename trainer_type,
+ typename in_sample_vector_type,
+ typename in_scalar_vector_type
+ >
+ const matrix<double, 1, 2, typename trainer_type::mem_manager_type>
+ cross_validate_trainer_threaded_impl (
+ const trainer_type& trainer,
+ const in_sample_vector_type& x,
+ const in_scalar_vector_type& y,
+ const long folds,
+ const long num_threads
+ )
+ {
+ using namespace dlib::cvtti_helpers;
+ typedef typename trainer_type::mem_manager_type mem_manager_type;
+
+ // make sure requires clause is not broken
+ DLIB_ASSERT(is_binary_classification_problem(x,y) == true &&
+ 1 < folds && folds <= std::min(sum(y>0),sum(y<0)) &&
+ num_threads > 0,
+ "\tmatrix cross_validate_trainer()"
+ << "\n\t invalid inputs were given to this function"
+ << "\n\t std::min(sum(y>0),sum(y<0)): " << std::min(sum(y>0),sum(y<0))
+ << "\n\t folds: " << folds
+ << "\n\t num_threads: " << num_threads
+ << "\n\t is_binary_classification_problem(x,y): " << ((is_binary_classification_problem(x,y))? "true":"false")
+ );
+
+
+ task mytask;
+ thread_pool tp(num_threads);
+
+
+ // count the number of positive and negative examples
+ long num_pos = 0;
+ long num_neg = 0;
+ for (long r = 0; r < y.nr(); ++r)
+ {
+ if (y(r) == +1.0)
+ ++num_pos;
+ else
+ ++num_neg;
+ }
+
+ // figure out how many positive and negative examples we will have in each fold
+ const long num_pos_test_samples = num_pos/folds;
+ const long num_pos_train_samples = num_pos - num_pos_test_samples;
+ const long num_neg_test_samples = num_neg/folds;
+ const long num_neg_train_samples = num_neg - num_neg_test_samples;
+
+
+ long pos_idx = 0;
+ long neg_idx = 0;
+
+
+
+ std::vector<future<job<trainer_type,in_sample_vector_type> > > jobs(folds);
+ std::vector<future<matrix<double, 1, 2, mem_manager_type> > > results(folds);
+
+
+ for (long i = 0; i < folds; ++i)
+ {
+ job<trainer_type,in_sample_vector_type>& j = jobs[i].get();
+
+ j.x = &x;
+ j.x_test.set_size (num_pos_test_samples + num_neg_test_samples);
+ j.y_test.set_size (num_pos_test_samples + num_neg_test_samples);
+ j.x_train.set_size(num_pos_train_samples + num_neg_train_samples);
+ j.y_train.set_size(num_pos_train_samples + num_neg_train_samples);
+ j.trainer = trainer;
+
+ long cur = 0;
+
+ // load up our positive test samples
+ while (cur < num_pos_test_samples)
+ {
+ if (y(pos_idx) == +1.0)
+ {
+ j.x_test(cur) = pos_idx;
+ j.y_test(cur) = +1.0;
+ ++cur;
+ }
+ pos_idx = (pos_idx+1)%x.nr();
+ }
+
+ // load up our negative test samples
+ while (cur < j.x_test.nr())
+ {
+ if (y(neg_idx) == -1.0)
+ {
+ j.x_test(cur) = neg_idx;
+ j.y_test(cur) = -1.0;
+ ++cur;
+ }
+ neg_idx = (neg_idx+1)%x.nr();
+ }
+
+ // load the training data from the data following whatever we loaded
+ // as the testing data
+ long train_pos_idx = pos_idx;
+ long train_neg_idx = neg_idx;
+ cur = 0;
+
+ // load up our positive train samples
+ while (cur < num_pos_train_samples)
+ {
+ if (y(train_pos_idx) == +1.0)
+ {
+ j.x_train(cur) = train_pos_idx;
+ j.y_train(cur) = +1.0;
+ ++cur;
+ }
+ train_pos_idx = (train_pos_idx+1)%x.nr();
+ }
+
+ // load up our negative train samples
+ while (cur < j.x_train.nr())
+ {
+ if (y(train_neg_idx) == -1.0)
+ {
+ j.x_train(cur) = train_neg_idx;
+ j.y_train(cur) = -1.0;
+ ++cur;
+ }
+ train_neg_idx = (train_neg_idx+1)%x.nr();
+ }
+
+ // finally spawn a task to process this job
+ tp.add_task(mytask, jobs[i], results[i]);
+
+ } // for (long i = 0; i < folds; ++i)
+
+ matrix<double, 1, 2, mem_manager_type> res;
+ set_all_elements(res,0);
+
+ // now compute the total results
+ for (long i = 0; i < folds; ++i)
+ {
+ res += results[i].get();
+ }
+
+ return res/(double)folds;
+ }
+
+ template <
+ typename trainer_type,
+ typename in_sample_vector_type,
+ typename in_scalar_vector_type
+ >
+ const matrix<double, 1, 2, typename trainer_type::mem_manager_type>
+ cross_validate_trainer_threaded (
+ const trainer_type& trainer,
+ const in_sample_vector_type& x,
+ const in_scalar_vector_type& y,
+ const long folds,
+ const long num_threads
+ )
+ {
+ return cross_validate_trainer_threaded_impl(trainer,
+ mat(x),
+ mat(y),
+ folds,
+ num_threads);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVm_THREADED_
+
+