summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/svm/svm_rank_trainer.h
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/dlib/svm/svm_rank_trainer.h')
-rw-r--r--ml/dlib/dlib/svm/svm_rank_trainer.h495
1 files changed, 495 insertions, 0 deletions
diff --git a/ml/dlib/dlib/svm/svm_rank_trainer.h b/ml/dlib/dlib/svm/svm_rank_trainer.h
new file mode 100644
index 000000000..0be737f48
--- /dev/null
+++ b/ml/dlib/dlib/svm/svm_rank_trainer.h
@@ -0,0 +1,495 @@
+// Copyright (C) 2012 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_SVM_RANK_TrAINER_Hh_
+#define DLIB_SVM_RANK_TrAINER_Hh_
+
+#include "svm_rank_trainer_abstract.h"
+
+#include "ranking_tools.h"
+#include "../algs.h"
+#include "../optimization.h"
+#include "function.h"
+#include "kernel.h"
+#include "sparse_vector.h"
+#include <iostream>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename matrix_type,
+ typename sample_type
+ >
+ class oca_problem_ranking_svm : public oca_problem<matrix_type >
+ {
+ public:
+ /*
+ This class is used as part of the implementation of the svm_rank_trainer
+ defined towards the end of this file.
+ */
+
+ typedef typename matrix_type::type scalar_type;
+
+ oca_problem_ranking_svm(
+ const scalar_type C_,
+ const std::vector<ranking_pair<sample_type> >& samples_,
+ const bool be_verbose_,
+ const scalar_type eps_,
+ const unsigned long max_iter,
+ const unsigned long dims_
+ ) :
+ samples(samples_),
+ C(C_),
+ be_verbose(be_verbose_),
+ eps(eps_),
+ max_iterations(max_iter),
+ dims(dims_)
+ {
+ }
+
+ virtual scalar_type get_c (
+ ) const
+ {
+ return C;
+ }
+
+ virtual long get_num_dimensions (
+ ) const
+ {
+ return dims;
+ }
+
+ virtual bool optimization_status (
+ scalar_type current_objective_value,
+ scalar_type current_error_gap,
+ scalar_type current_risk_value,
+ scalar_type current_risk_gap,
+ unsigned long num_cutting_planes,
+ unsigned long num_iterations
+ ) const
+ {
+ if (be_verbose)
+ {
+ using namespace std;
+ cout << "objective: " << current_objective_value << endl;
+ cout << "objective gap: " << current_error_gap << endl;
+ cout << "risk: " << current_risk_value << endl;
+ cout << "risk gap: " << current_risk_gap << endl;
+ cout << "num planes: " << num_cutting_planes << endl;
+ cout << "iter: " << num_iterations << endl;
+ cout << endl;
+ }
+
+ if (num_iterations >= max_iterations)
+ return true;
+
+ if (current_risk_gap < eps)
+ return true;
+
+ return false;
+ }
+
+ virtual bool risk_has_lower_bound (
+ scalar_type& lower_bound
+ ) const
+ {
+ lower_bound = 0;
+ return true;
+ }
+
+ virtual void get_risk (
+ matrix_type& w,
+ scalar_type& risk,
+ matrix_type& subgradient
+ ) const
+ {
+ subgradient.set_size(w.size(),1);
+ subgradient = 0;
+ risk = 0;
+
+ // Note that we want the risk value to be in terms of the fraction of overall
+ // rank flips. So a risk of 0.1 would mean that rank flips happen < 10% of the
+ // time.
+
+
+ std::vector<double> rel_scores;
+ std::vector<double> nonrel_scores;
+ std::vector<unsigned long> rel_counts;
+ std::vector<unsigned long> nonrel_counts;
+
+ unsigned long total_pairs = 0;
+
+ // loop over all the samples and compute the risk and its subgradient at the current solution point w
+ for (unsigned long i = 0; i < samples.size(); ++i)
+ {
+ rel_scores.resize(samples[i].relevant.size());
+ nonrel_scores.resize(samples[i].nonrelevant.size());
+
+ for (unsigned long k = 0; k < rel_scores.size(); ++k)
+ rel_scores[k] = dot(samples[i].relevant[k], w);
+
+ for (unsigned long k = 0; k < nonrel_scores.size(); ++k)
+ nonrel_scores[k] = dot(samples[i].nonrelevant[k], w) + 1;
+
+ count_ranking_inversions(rel_scores, nonrel_scores, rel_counts, nonrel_counts);
+
+ total_pairs += rel_scores.size()*nonrel_scores.size();
+
+ for (unsigned long k = 0; k < rel_counts.size(); ++k)
+ {
+ if (rel_counts[k] != 0)
+ {
+ risk -= rel_counts[k]*rel_scores[k];
+ subtract_from(subgradient, samples[i].relevant[k], rel_counts[k]);
+ }
+ }
+
+ for (unsigned long k = 0; k < nonrel_counts.size(); ++k)
+ {
+ if (nonrel_counts[k] != 0)
+ {
+ risk += nonrel_counts[k]*nonrel_scores[k];
+ add_to(subgradient, samples[i].nonrelevant[k], nonrel_counts[k]);
+ }
+ }
+
+ }
+
+ const scalar_type scale = 1.0/total_pairs;
+
+ risk *= scale;
+ subgradient = scale*subgradient;
+ }
+
+ private:
+
+ // -----------------------------------------------------
+ // -----------------------------------------------------
+
+
+ const std::vector<ranking_pair<sample_type> >& samples;
+ const scalar_type C;
+
+ const bool be_verbose;
+ const scalar_type eps;
+ const unsigned long max_iterations;
+ const unsigned long dims;
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename matrix_type,
+ typename sample_type,
+ typename scalar_type
+ >
+ oca_problem_ranking_svm<matrix_type, sample_type> make_oca_problem_ranking_svm (
+ const scalar_type C,
+ const std::vector<ranking_pair<sample_type> >& samples,
+ const bool be_verbose,
+ const scalar_type eps,
+ const unsigned long max_iterations,
+ const unsigned long dims
+ )
+ {
+ return oca_problem_ranking_svm<matrix_type, sample_type>(
+ C, samples, be_verbose, eps, max_iterations, dims);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename K
+ >
+ class svm_rank_trainer
+ {
+
+ public:
+ typedef K kernel_type;
+ typedef typename kernel_type::scalar_type scalar_type;
+ typedef typename kernel_type::sample_type sample_type;
+ typedef typename kernel_type::mem_manager_type mem_manager_type;
+ typedef decision_function<kernel_type> trained_function_type;
+
+ // You are getting a compiler error on this line because you supplied a non-linear kernel
+ // to the svm_rank_trainer object. You have to use one of the linear kernels with this
+ // trainer.
+ COMPILE_TIME_ASSERT((is_same_type<K, linear_kernel<sample_type> >::value ||
+ is_same_type<K, sparse_linear_kernel<sample_type> >::value ));
+
+ svm_rank_trainer (
+ )
+ {
+ C = 1;
+ verbose = false;
+ eps = 0.001;
+ max_iterations = 10000;
+ learn_nonnegative_weights = false;
+ last_weight_1 = false;
+ }
+
+ explicit svm_rank_trainer (
+ const scalar_type& C_
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(C_ > 0,
+ "\t svm_rank_trainer::svm_rank_trainer()"
+ << "\n\t C_ must be greater than 0"
+ << "\n\t C_: " << C_
+ << "\n\t this: " << this
+ );
+
+ C = C_;
+ verbose = false;
+ eps = 0.001;
+ max_iterations = 10000;
+ learn_nonnegative_weights = false;
+ last_weight_1 = false;
+ }
+
+ void set_epsilon (
+ scalar_type eps_
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(eps_ > 0,
+ "\t void svm_rank_trainer::set_epsilon()"
+ << "\n\t eps_ must be greater than 0"
+ << "\n\t eps_: " << eps_
+ << "\n\t this: " << this
+ );
+
+ eps = eps_;
+ }
+
+ const scalar_type get_epsilon (
+ ) const { return eps; }
+
+ unsigned long get_max_iterations (
+ ) const { return max_iterations; }
+
+ void set_max_iterations (
+ unsigned long max_iter
+ )
+ {
+ max_iterations = max_iter;
+ }
+
+ void be_verbose (
+ )
+ {
+ verbose = true;
+ }
+
+ void be_quiet (
+ )
+ {
+ verbose = false;
+ }
+
+ bool forces_last_weight_to_1 (
+ ) const
+ {
+ return last_weight_1;
+ }
+
+ void force_last_weight_to_1 (
+ bool should_last_weight_be_1
+ )
+ {
+ last_weight_1 = should_last_weight_be_1;
+ if (last_weight_1)
+ prior.set_size(0);
+ }
+
+ void set_oca (
+ const oca& item
+ )
+ {
+ solver = item;
+ }
+
+ const oca get_oca (
+ ) const
+ {
+ return solver;
+ }
+
+ const kernel_type get_kernel (
+ ) const
+ {
+ return kernel_type();
+ }
+
+ bool learns_nonnegative_weights (
+ ) const { return learn_nonnegative_weights; }
+
+ void set_learns_nonnegative_weights (
+ bool value
+ )
+ {
+ learn_nonnegative_weights = value;
+ if (learn_nonnegative_weights)
+ prior.set_size(0);
+ }
+
+ void set_prior (
+ const trained_function_type& prior_
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(prior_.basis_vectors.size() == 1 &&
+ prior_.alpha(0) == 1,
+ "\t void svm_rank_trainer::set_prior()"
+ << "\n\t The supplied prior could not have been created by this object's train() method."
+ << "\n\t prior_.basis_vectors.size(): " << prior_.basis_vectors.size()
+ << "\n\t prior_.alpha(0): " << prior_.alpha(0)
+ << "\n\t this: " << this
+ );
+
+ prior = sparse_to_dense(prior_.basis_vectors(0));
+ learn_nonnegative_weights = false;
+ last_weight_1 = false;
+ }
+
+ bool has_prior (
+ ) const
+ {
+ return prior.size() != 0;
+ }
+
+ void set_c (
+ scalar_type C_
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(C_ > 0,
+ "\t void svm_rank_trainer::set_c()"
+ << "\n\t C_ must be greater than 0"
+ << "\n\t C_: " << C_
+ << "\n\t this: " << this
+ );
+
+ C = C_;
+ }
+
+ const scalar_type get_c (
+ ) const
+ {
+ return C;
+ }
+
+ const decision_function<kernel_type> train (
+ const std::vector<ranking_pair<sample_type> >& samples
+ ) const
+ {
+ // make sure requires clause is not broken
+ DLIB_CASSERT(is_ranking_problem(samples) == true,
+ "\t decision_function svm_rank_trainer::train(samples)"
+ << "\n\t invalid inputs were given to this function"
+ << "\n\t samples.size(): " << samples.size()
+ << "\n\t is_ranking_problem(samples): " << is_ranking_problem(samples)
+ );
+
+
+ typedef matrix<scalar_type,0,1> w_type;
+ w_type w;
+
+ const unsigned long num_dims = max_index_plus_one(samples);
+
+ unsigned long num_nonnegative = 0;
+ if (learn_nonnegative_weights)
+ {
+ num_nonnegative = num_dims;
+ }
+
+ unsigned long force_weight_1_idx = std::numeric_limits<unsigned long>::max();
+ if (last_weight_1)
+ {
+ force_weight_1_idx = num_dims-1;
+ }
+
+ if (has_prior())
+ {
+ if (is_matrix<sample_type>::value)
+ {
+ // make sure requires clause is not broken
+ DLIB_CASSERT(num_dims == (unsigned long)prior.size(),
+ "\t decision_function svm_rank_trainer::train(samples)"
+ << "\n\t The dimension of the training vectors must match the dimension of\n"
+ << "\n\t those used to create the prior."
+ << "\n\t num_dims: " << num_dims
+ << "\n\t prior.size(): " << prior.size()
+ );
+ }
+ const unsigned long dims = std::max(num_dims, (unsigned long)prior.size());
+ // In the case of sparse sample vectors, it is possible that the input
+ // vector dimensionality is larger than the prior vector dimensionality.
+ // We need to check for this case and pad prior with zeros if it is the
+ // case.
+ if ((unsigned long)prior.size() < dims)
+ {
+ matrix<scalar_type,0,1> prior_temp = join_cols(prior, zeros_matrix<scalar_type>(dims-prior.size(),1));
+ solver( make_oca_problem_ranking_svm<w_type>(C, samples, verbose, eps, max_iterations, dims),
+ w,
+ prior_temp);
+ }
+ else
+ {
+ solver( make_oca_problem_ranking_svm<w_type>(C, samples, verbose, eps, max_iterations, dims),
+ w,
+ prior);
+ }
+
+ }
+ else
+ {
+ solver( make_oca_problem_ranking_svm<w_type>(C, samples, verbose, eps, max_iterations, num_dims),
+ w,
+ num_nonnegative,
+ force_weight_1_idx);
+ }
+
+
+ // put the solution into a decision function and then return it
+ decision_function<kernel_type> df;
+ df.b = 0;
+ df.basis_vectors.set_size(1);
+ // Copy the results into the output basis vector. The output vector might be a
+ // sparse vector container so we need to use this special kind of copy to
+ // handle that case.
+ assign(df.basis_vectors(0), matrix_cast<scalar_type>(w));
+ df.alpha.set_size(1);
+ df.alpha(0) = 1;
+
+ return df;
+ }
+
+ const decision_function<kernel_type> train (
+ const ranking_pair<sample_type>& sample
+ ) const
+ {
+ return train(std::vector<ranking_pair<sample_type> >(1, sample));
+ }
+
+ private:
+
+ scalar_type C;
+ oca solver;
+ scalar_type eps;
+ bool verbose;
+ unsigned long max_iterations;
+ bool learn_nonnegative_weights;
+ bool last_weight_1;
+ matrix<scalar_type,0,1> prior;
+ };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVM_RANK_TrAINER_Hh_
+