summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/svm/svm_multiclass_linear_trainer.h
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/dlib/svm/svm_multiclass_linear_trainer.h')
-rw-r--r--ml/dlib/dlib/svm/svm_multiclass_linear_trainer.h432
1 files changed, 432 insertions, 0 deletions
diff --git a/ml/dlib/dlib/svm/svm_multiclass_linear_trainer.h b/ml/dlib/dlib/svm/svm_multiclass_linear_trainer.h
new file mode 100644
index 000000000..4727f7226
--- /dev/null
+++ b/ml/dlib/dlib/svm/svm_multiclass_linear_trainer.h
@@ -0,0 +1,432 @@
+// Copyright (C) 2011 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_SVm_MULTICLASS_LINEAR_TRAINER_Hh_
+#define DLIB_SVm_MULTICLASS_LINEAR_TRAINER_Hh_
+
+#include "svm_multiclass_linear_trainer_abstract.h"
+#include "structural_svm_problem_threaded.h"
+#include <vector>
+#include "../optimization/optimization_oca.h"
+#include "../matrix.h"
+#include "sparse_vector.h"
+#include "function.h"
+#include <algorithm>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename matrix_type,
+ typename sample_type,
+ typename label_type
+ >
+ class multiclass_svm_problem : public structural_svm_problem_threaded<matrix_type,
+ std::vector<std::pair<unsigned long,typename matrix_type::type> > >
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This object defines the optimization problem for the multiclass SVM trainer
+ object at the bottom of this file.
+
+ The joint feature vectors used by this object, the PSI(x,y) vectors, are
+ defined as follows:
+ PSI(x,0) = [x,0,0,0,0, ...,0]
+ PSI(x,1) = [0,x,0,0,0, ...,0]
+ PSI(x,2) = [0,0,x,0,0, ...,0]
+ That is, if there are N labels then the joint feature vector has a
+ dimension that is N times the dimension of a single x sample. Also,
+ note that we append a -1 value onto each x to account for the bias term.
+ !*/
+
+ public:
+ typedef typename matrix_type::type scalar_type;
+ typedef std::vector<std::pair<unsigned long,scalar_type> > feature_vector_type;
+
+ multiclass_svm_problem (
+ const std::vector<sample_type>& samples_,
+ const std::vector<label_type>& labels_,
+ const std::vector<label_type>& distinct_labels_,
+ const unsigned long dims_,
+ const unsigned long num_threads
+ ) :
+ structural_svm_problem_threaded<matrix_type, std::vector<std::pair<unsigned long,typename matrix_type::type> > >(num_threads),
+ samples(samples_),
+ labels(labels_),
+ distinct_labels(distinct_labels_),
+ dims(dims_+1) // +1 for the bias
+ {}
+
+ virtual long get_num_dimensions (
+ ) const
+ {
+ return dims*distinct_labels.size();
+ }
+
+ virtual long get_num_samples (
+ ) const
+ {
+ return static_cast<long>(samples.size());
+ }
+
+ virtual void get_truth_joint_feature_vector (
+ long idx,
+ feature_vector_type& psi
+ ) const
+ {
+ assign(psi, samples[idx]);
+ // Add a constant -1 to account for the bias term.
+ psi.push_back(std::make_pair(dims-1,static_cast<scalar_type>(-1)));
+
+ // Find which distinct label goes with this psi.
+ long label_idx = 0;
+ for (unsigned long i = 0; i < distinct_labels.size(); ++i)
+ {
+ if (distinct_labels[i] == labels[idx])
+ {
+ label_idx = i;
+ break;
+ }
+ }
+
+ offset_feature_vector(psi, dims*label_idx);
+ }
+
+ virtual void separation_oracle (
+ const long idx,
+ const matrix_type& current_solution,
+ scalar_type& loss,
+ feature_vector_type& psi
+ ) const
+ {
+ scalar_type best_val = -std::numeric_limits<scalar_type>::infinity();
+ unsigned long best_idx = 0;
+
+ // Figure out which label is the best. That is, what label maximizes
+ // LOSS(idx,y) + F(x,y). Note that y in this case is given by distinct_labels[i].
+ for (unsigned long i = 0; i < distinct_labels.size(); ++i)
+ {
+ // Compute the F(x,y) part:
+ // perform: temp == dot(relevant part of current solution, samples[idx]) - current_bias
+ scalar_type temp = dot(mat(&current_solution(i*dims),dims-1), samples[idx]) - current_solution((i+1)*dims-1);
+
+ // Add the LOSS(idx,y) part:
+ if (labels[idx] != distinct_labels[i])
+ temp += 1;
+
+ // Now temp == LOSS(idx,y) + F(x,y). Check if it is the biggest we have seen.
+ if (temp > best_val)
+ {
+ best_val = temp;
+ best_idx = i;
+ }
+ }
+
+ assign(psi, samples[idx]);
+ // add a constant -1 to account for the bias term
+ psi.push_back(std::make_pair(dims-1,static_cast<scalar_type>(-1)));
+
+ offset_feature_vector(psi, dims*best_idx);
+
+ if (distinct_labels[best_idx] == labels[idx])
+ loss = 0;
+ else
+ loss = 1;
+ }
+
+ private:
+
+ void offset_feature_vector (
+ feature_vector_type& sample,
+ const unsigned long val
+ ) const
+ {
+ if (val != 0)
+ {
+ for (typename feature_vector_type::iterator i = sample.begin(); i != sample.end(); ++i)
+ {
+ i->first += val;
+ }
+ }
+ }
+
+
+ const std::vector<sample_type>& samples;
+ const std::vector<label_type>& labels;
+ const std::vector<label_type>& distinct_labels;
+ const long dims;
+ };
+
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename K,
+ typename label_type_ = typename K::scalar_type
+ >
+ class svm_multiclass_linear_trainer
+ {
+ public:
+ typedef label_type_ label_type;
+ typedef K kernel_type;
+ typedef typename kernel_type::scalar_type scalar_type;
+ typedef typename kernel_type::sample_type sample_type;
+ typedef typename kernel_type::mem_manager_type mem_manager_type;
+
+ typedef multiclass_linear_decision_function<kernel_type, label_type> trained_function_type;
+
+
+ // You are getting a compiler error on this line because you supplied a non-linear kernel
+ // to the svm_c_linear_trainer object. You have to use one of the linear kernels with this
+ // trainer.
+ COMPILE_TIME_ASSERT((is_same_type<K, linear_kernel<sample_type> >::value ||
+ is_same_type<K, sparse_linear_kernel<sample_type> >::value ));
+
+ svm_multiclass_linear_trainer (
+ ) :
+ num_threads(4),
+ C(1),
+ eps(0.001),
+ max_iterations(10000),
+ verbose(false),
+ learn_nonnegative_weights(false)
+ {
+ }
+
+ void set_num_threads (
+ unsigned long num
+ )
+ {
+ num_threads = num;
+ }
+
+ unsigned long get_num_threads (
+ ) const
+ {
+ return num_threads;
+ }
+
+ void set_epsilon (
+ scalar_type eps_
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(eps_ > 0,
+ "\t void svm_multiclass_linear_trainer::set_epsilon()"
+ << "\n\t eps_ must be greater than 0"
+ << "\n\t eps_: " << eps_
+ << "\n\t this: " << this
+ );
+
+ eps = eps_;
+ }
+
+ const scalar_type get_epsilon (
+ ) const { return eps; }
+
+ unsigned long get_max_iterations (
+ ) const { return max_iterations; }
+
+ void set_max_iterations (
+ unsigned long max_iter
+ )
+ {
+ max_iterations = max_iter;
+ }
+
+ void be_verbose (
+ )
+ {
+ verbose = true;
+ }
+
+ void be_quiet (
+ )
+ {
+ verbose = false;
+ }
+
+ void set_oca (
+ const oca& item
+ )
+ {
+ solver = item;
+ }
+
+ const oca get_oca (
+ ) const
+ {
+ return solver;
+ }
+
+ const kernel_type get_kernel (
+ ) const
+ {
+ return kernel_type();
+ }
+
+ bool learns_nonnegative_weights (
+ ) const { return learn_nonnegative_weights; }
+
+ void set_learns_nonnegative_weights (
+ bool value
+ )
+ {
+ learn_nonnegative_weights = value;
+ if (learn_nonnegative_weights)
+ prior = trained_function_type();
+ }
+
+ void set_c (
+ scalar_type C_
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(C_ > 0,
+ "\t void svm_multiclass_linear_trainer::set_c()"
+ << "\n\t C must be greater than 0"
+ << "\n\t C_: " << C_
+ << "\n\t this: " << this
+ );
+
+ C = C_;
+ }
+
+ const scalar_type get_c (
+ ) const
+ {
+ return C;
+ }
+
+ void set_prior (
+ const trained_function_type& prior_
+ )
+ {
+ prior = prior_;
+ learn_nonnegative_weights = false;
+ }
+
+ bool has_prior (
+ ) const
+ {
+ return prior.labels.size() != 0;
+ }
+
+ trained_function_type train (
+ const std::vector<sample_type>& all_samples,
+ const std::vector<label_type>& all_labels
+ ) const
+ {
+ scalar_type svm_objective = 0;
+ return train(all_samples, all_labels, svm_objective);
+ }
+
+ trained_function_type train (
+ const std::vector<sample_type>& all_samples,
+ const std::vector<label_type>& all_labels,
+ scalar_type& svm_objective
+ ) const
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(is_learning_problem(all_samples,all_labels),
+ "\t trained_function_type svm_multiclass_linear_trainer::train(all_samples,all_labels)"
+ << "\n\t invalid inputs were given to this function"
+ << "\n\t all_samples.size(): " << all_samples.size()
+ << "\n\t all_labels.size(): " << all_labels.size()
+ );
+
+ trained_function_type df;
+ df.labels = select_all_distinct_labels(all_labels);
+ if (has_prior())
+ {
+ df.labels.insert(df.labels.end(), prior.labels.begin(), prior.labels.end());
+ df.labels = select_all_distinct_labels(df.labels);
+ }
+ const long input_sample_dimensionality = max_index_plus_one(all_samples);
+ // If the samples are sparse then the right thing to do is to take the max
+ // dimensionality between the prior and the new samples. But if the samples
+ // are dense vectors then they definitely all have to have exactly the same
+ // dimensionality.
+ const long dims = std::max(df.weights.nc(),input_sample_dimensionality);
+ if (is_matrix<sample_type>::value && has_prior())
+ {
+ DLIB_ASSERT(input_sample_dimensionality == prior.weights.nc(),
+ "\t trained_function_type svm_multiclass_linear_trainer::train(all_samples,all_labels)"
+ << "\n\t The training samples given to this function are not the same kind of training "
+ << "\n\t samples used to create the prior."
+ << "\n\t input_sample_dimensionality: " << input_sample_dimensionality
+ << "\n\t prior.weights.nc(): " << prior.weights.nc()
+ );
+ }
+
+ typedef matrix<scalar_type,0,1> w_type;
+ w_type weights;
+ multiclass_svm_problem<w_type, sample_type, label_type> problem(all_samples, all_labels, df.labels, dims, num_threads);
+ if (verbose)
+ problem.be_verbose();
+
+ problem.set_max_cache_size(0);
+ problem.set_c(C);
+ problem.set_epsilon(eps);
+ problem.set_max_iterations(max_iterations);
+
+ unsigned long num_nonnegative = 0;
+ if (learn_nonnegative_weights)
+ {
+ num_nonnegative = problem.get_num_dimensions();
+ }
+
+ if (!has_prior())
+ {
+ svm_objective = solver(problem, weights, num_nonnegative);
+ }
+ else
+ {
+ matrix<scalar_type> temp(df.labels.size(),dims);
+ w_type b(df.labels.size());
+ temp = 0;
+ b = 0;
+
+ const long pad_size = dims-prior.weights.nc();
+ // Copy the prior into the temp and b matrices. We have to do this row
+ // by row copy because the new training data might have new labels we
+ // haven't seen before and therefore the sizes of these matrices could be
+ // different.
+ for (unsigned long i = 0; i < prior.labels.size(); ++i)
+ {
+ const long r = std::find(df.labels.begin(), df.labels.end(), prior.labels[i])-df.labels.begin();
+ set_rowm(temp,r) = join_rows(rowm(prior.weights,i), zeros_matrix<scalar_type>(1,pad_size));
+ b(r) = prior.b(i);
+ }
+
+ const w_type prior_vect = reshape_to_column_vector(join_rows(temp,b));
+ svm_objective = solver(problem, weights, prior_vect);
+ }
+
+
+ df.weights = colm(reshape(weights, df.labels.size(), dims+1), range(0,dims-1));
+ df.b = colm(reshape(weights, df.labels.size(), dims+1), dims);
+ return df;
+ }
+
+ private:
+
+ unsigned long num_threads;
+ scalar_type C;
+ scalar_type eps;
+ unsigned long max_iterations;
+ bool verbose;
+ oca solver;
+ bool learn_nonnegative_weights;
+
+ trained_function_type prior;
+ };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+
+#endif // DLIB_SVm_MULTICLASS_LINEAR_TRAINER_Hh_
+