diff options
Diffstat (limited to 'ml/dlib/dlib/svm/rr_trainer.h')
-rw-r--r-- | ml/dlib/dlib/svm/rr_trainer.h | 456 |
1 files changed, 0 insertions, 456 deletions
diff --git a/ml/dlib/dlib/svm/rr_trainer.h b/ml/dlib/dlib/svm/rr_trainer.h deleted file mode 100644 index 09177217e..000000000 --- a/ml/dlib/dlib/svm/rr_trainer.h +++ /dev/null @@ -1,456 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_RR_TRAInER_Hh_ -#define DLIB_RR_TRAInER_Hh_ - -#include "../algs.h" -#include "function.h" -#include "kernel.h" -#include "empirical_kernel_map.h" -#include "linearly_independent_subset_finder.h" -#include "../statistics.h" -#include "rr_trainer_abstract.h" -#include <vector> -#include <iostream> - -namespace dlib -{ - template < - typename K - > - class rr_trainer - { - - public: - typedef K kernel_type; - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - typedef decision_function<kernel_type> trained_function_type; - - // You are getting a compiler error on this line because you supplied a non-linear or - // sparse kernel to the rr_trainer object. You have to use dlib::linear_kernel with this trainer. - COMPILE_TIME_ASSERT((is_same_type<K, linear_kernel<sample_type> >::value)); - - rr_trainer ( - ) : - verbose(false), - use_regression_loss(true), - lambda(0) - { - // default lambda search list - lams = matrix_cast<scalar_type>(logspace(-9, 2, 50)); - } - - void be_verbose ( - ) - { - verbose = true; - } - - void be_quiet ( - ) - { - verbose = false; - } - - void use_regression_loss_for_loo_cv ( - ) - { - use_regression_loss = true; - } - - void use_classification_loss_for_loo_cv ( - ) - { - use_regression_loss = false; - } - - bool will_use_regression_loss_for_loo_cv ( - ) const - { - return use_regression_loss; - } - - const kernel_type get_kernel ( - ) const - { - return kernel_type(); - } - - void set_lambda ( - scalar_type lambda_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(lambda_ >= 0, - "\t void rr_trainer::set_lambda()" - << "\n\t lambda must be greater than or equal to 0" - << "\n\t lambda: " << lambda - << "\n\t this: " << this - ); - - lambda = lambda_; - } - - const scalar_type get_lambda ( - ) const - { - return lambda; - } - - template <typename EXP> - void set_search_lambdas ( - const matrix_exp<EXP>& lambdas - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_vector(lambdas) && lambdas.size() > 0 && min(lambdas) > 0, - "\t void rr_trainer::set_search_lambdas()" - << "\n\t lambdas must be a non-empty vector of values" - << "\n\t is_vector(lambdas): " << is_vector(lambdas) - << "\n\t lambdas.size(): " << lambdas.size() - << "\n\t min(lambdas): " << min(lambdas) - << "\n\t this: " << this - ); - - - lams = matrix_cast<scalar_type>(lambdas); - } - - const matrix<scalar_type,0,0,mem_manager_type>& get_search_lambdas ( - ) const - { - return lams; - } - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y - ) const - { - std::vector<scalar_type> temp; - scalar_type temp2; - return do_train(mat(x), mat(y), false, temp, temp2); - } - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y, - std::vector<scalar_type>& loo_values - ) const - { - scalar_type temp; - return do_train(mat(x), mat(y), true, loo_values, temp); - } - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y, - std::vector<scalar_type>& loo_values, - scalar_type& lambda_used - ) const - { - return do_train(mat(x), mat(y), true, loo_values, lambda_used); - } - - - private: - - template < - typename in_sample_vector_type, - typename in_scalar_vector_type - > - const decision_function<kernel_type> do_train ( - const in_sample_vector_type& x, - const in_scalar_vector_type& y, - const bool output_loo_values, - std::vector<scalar_type>& loo_values, - scalar_type& the_lambda - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(is_learning_problem(x,y), - "\t decision_function rr_trainer::train(x,y)" - << "\n\t invalid inputs were given to this function" - << "\n\t is_vector(x): " << is_vector(x) - << "\n\t is_vector(y): " << is_vector(y) - << "\n\t x.size(): " << x.size() - << "\n\t y.size(): " << y.size() - ); - -#ifdef ENABLE_ASSERTS - if (get_lambda() == 0 && will_use_regression_loss_for_loo_cv() == false) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_binary_classification_problem(x,y), - "\t decision_function rr_trainer::train(x,y)" - << "\n\t invalid inputs were given to this function" - ); - } -#endif - - typedef matrix<scalar_type,0,1,mem_manager_type> column_matrix_type; - typedef matrix<scalar_type,0,0,mem_manager_type> general_matrix_type; - - const long dims = x(0).size(); - - /* - Notes on the solution of ridge regression - - Let A = an x.size() by dims matrix which contains all the data samples. - - Let I = an identity matrix - - Let C = trans(A)*A - Let L = trans(A)*y - - Then the optimal w is given by: - w = inv(C + lambda*I) * L - - - There is a trick to compute leave one out cross validation results for many different - lambda values quickly. The following paper has a detailed discussion of various - approaches: - - Notes on Regularized Least Squares by Ryan M. Rifkin and Ross A. Lippert. - - In the implementation of the rr_trainer I'm only using two simple equations - from the above paper. - - - First note that inv(C + lambda*I) can be computed for many different lambda - values in an efficient way by using an eigen decomposition of C. So we use - the fact that: - inv(C + lambda*I) == V*inv(D + lambda*I)*trans(V) - where V*D*trans(V) == C - - Also, via some simple linear algebra the above paper works out that the leave one out - value for a sample x(i) is equal to the following: - Let G = inv(C + lambda*I) - let val = trans(x(i))*G*x(i); - - leave one out value for sample x(i): - LOOV = (trans(w)*x(i) - y(i)*val) / (1 - val) - - leave one out error for sample x(i): - LOOE = loss(y(i), LOOV) - - - Finally, note that we will pretend there was a 1 appended to the end of each - vector in x. We won't actually do that though because we don't want to - have to make a copy of all the samples. So throughout the following code - I have explicitly dealt with this. - */ - - general_matrix_type C, tempm, G; - column_matrix_type L, tempv, w; - - // compute C and L - for (long i = 0; i < x.size(); ++i) - { - C += x(i)*trans(x(i)); - L += y(i)*x(i); - tempv += x(i); - } - - // Account for the extra 1 that we pretend is appended to x - // Make C = [C tempv - // tempv' x.size()] - C = join_cols(join_rows(C, tempv), - join_rows(trans(tempv), uniform_matrix<scalar_type>(1,1, x.size()))); - L = join_cols(L, uniform_matrix<scalar_type>(1,1, sum(y))); - - eigenvalue_decomposition<general_matrix_type> eig(make_symmetric(C)); - const general_matrix_type V = eig.get_pseudo_v(); - const column_matrix_type D = eig.get_real_eigenvalues(); - - // We can save some work by pre-multiplying the x vectors by trans(V) - // and saving the result so we don't have to recompute it over and over later. - matrix<column_matrix_type,0,1,mem_manager_type > Vx; - if (lambda == 0 || output_loo_values) - { - // Save the transpose of V into a temporary because the subsequent matrix - // vector multiplies will be faster (because of better cache locality). - const general_matrix_type transV( colm(trans(V),range(0,dims-1)) ); - // Remember the pretend 1 at the end of x(*). We want to multiply trans(V)*x(*) - // so to do this we pull the last column off trans(V) and store it separately. - const column_matrix_type lastV = colm(trans(V), dims); - Vx.set_size(x.size()); - for (long i = 0; i < x.size(); ++i) - { - Vx(i) = transV*x(i); - Vx(i) = squared(Vx(i) + lastV); - } - } - - the_lambda = lambda; - - // If we need to automatically select a lambda then do so using the LOOE trick described - // above. - bool did_loov = false; - scalar_type best_looe = std::numeric_limits<scalar_type>::max(); - if (lambda == 0) - { - did_loov = true; - - // Compute leave one out errors for a bunch of different lambdas and pick the best one. - for (long idx = 0; idx < lams.size(); ++idx) - { - // first compute G - tempv = 1.0/(D + lams(idx)); - tempm = scale_columns(V,tempv); - G = tempm*trans(V); - - // compute the solution w for the current lambda - w = G*L; - - // make w have the same length as the x vectors. - const scalar_type b = w(dims); - w = colm(w,0,dims); - - scalar_type looe = 0; - for (long i = 0; i < x.size(); ++i) - { - // perform equivalent of: val = trans(x(i))*G*x(i); - const scalar_type val = dot(tempv, Vx(i)); - const scalar_type temp = (1 - val); - scalar_type loov; - if (temp != 0) - loov = (trans(w)*x(i) + b - y(i)*val) / temp; - else - loov = 0; - - looe += loss(loov, y(i)); - } - - // Keep track of the lambda which gave the lowest looe. If two lambdas - // have the same looe then pick the biggest lambda. - if (looe < best_looe || (looe == best_looe && lams(idx) > the_lambda)) - { - best_looe = looe; - the_lambda = lams(idx); - } - } - - best_looe /= x.size(); - } - - - - // Now perform the main training. That is, find w. - // first, compute G = inv(C + the_lambda*I) - tempv = 1.0/(D + the_lambda); - tempm = scale_columns(V,tempv); - G = tempm*trans(V); - w = G*L; - - // make w have the same length as the x vectors. - const scalar_type b = w(dims); - w = colm(w,0,dims); - - - // If we haven't done this already and we are supposed to then compute the LOO error rate for - // the current lambda and store the result in best_looe. - if (output_loo_values) - { - loo_values.resize(x.size()); - did_loov = true; - best_looe = 0; - for (long i = 0; i < x.size(); ++i) - { - // perform equivalent of: val = trans(x(i))*G*x(i); - const scalar_type val = dot(tempv, Vx(i)); - const scalar_type temp = (1 - val); - scalar_type loov; - if (temp != 0) - loov = (trans(w)*x(i) + b - y(i)*val) / temp; - else - loov = 0; - - best_looe += loss(loov, y(i)); - loo_values[i] = loov; - } - - best_looe /= x.size(); - - } - else - { - loo_values.clear(); - } - - if (verbose && did_loov) - { - using namespace std; - cout << "Using lambda: " << the_lambda << endl; - if (use_regression_loss) - cout << "LOO Mean Squared Error: " << best_looe << endl; - else - cout << "LOO Classification Error: " << best_looe << endl; - } - - // convert w into a proper decision function - decision_function<kernel_type> df; - df.alpha.set_size(1); - df.alpha = 1; - df.basis_vectors.set_size(1); - df.basis_vectors(0) = w; - df.b = -b; // don't forget about the bias we stuck onto all the vectors - - return df; - } - - inline scalar_type loss ( - const scalar_type& a, - const scalar_type& b - ) const - { - if (use_regression_loss) - { - return (a-b)*(a-b); - } - else - { - // if a and b have the same sign then no loss - if (a*b >= 0) - return 0; - else - return 1; - } - } - - - /*! - CONVENTION - - get_lambda() == lambda - - get_kernel() == kernel_type() - - will_use_regression_loss_for_loo_cv() == use_regression_loss - - get_search_lambdas() == lams - !*/ - - bool verbose; - bool use_regression_loss; - - scalar_type lambda; - - matrix<scalar_type,0,0,mem_manager_type> lams; - }; - -} - -#endif // DLIB_RR_TRAInER_Hh_ - - |