summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/svm/reduced.h
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/dlib/svm/reduced.h')
-rw-r--r--ml/dlib/dlib/svm/reduced.h613
1 files changed, 613 insertions, 0 deletions
diff --git a/ml/dlib/dlib/svm/reduced.h b/ml/dlib/dlib/svm/reduced.h
new file mode 100644
index 000000000..b4c5b63ca
--- /dev/null
+++ b/ml/dlib/dlib/svm/reduced.h
@@ -0,0 +1,613 @@
+// Copyright (C) 2008 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_REDUCEd_TRAINERS_
+#define DLIB_REDUCEd_TRAINERS_
+
+#include "reduced_abstract.h"
+#include "../matrix.h"
+#include "../algs.h"
+#include "function.h"
+#include "kernel.h"
+#include "kcentroid.h"
+#include "linearly_independent_subset_finder.h"
+#include "../optimization.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename trainer_type
+ >
+ class reduced_decision_function_trainer
+ {
+ public:
+ typedef typename trainer_type::kernel_type kernel_type;
+ typedef typename trainer_type::scalar_type scalar_type;
+ typedef typename trainer_type::sample_type sample_type;
+ typedef typename trainer_type::mem_manager_type mem_manager_type;
+ typedef typename trainer_type::trained_function_type trained_function_type;
+
+ reduced_decision_function_trainer (
+ ) :num_bv(0) {}
+
+ reduced_decision_function_trainer (
+ const trainer_type& trainer_,
+ const unsigned long num_sb_
+ ) :
+ trainer(trainer_),
+ num_bv(num_sb_)
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(num_bv > 0,
+ "\t reduced_decision_function_trainer()"
+ << "\n\t you have given invalid arguments to this function"
+ << "\n\t num_bv: " << num_bv
+ );
+ }
+
+ template <
+ typename in_sample_vector_type,
+ typename in_scalar_vector_type
+ >
+ const decision_function<kernel_type> train (
+ const in_sample_vector_type& x,
+ const in_scalar_vector_type& y
+ ) const
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(num_bv > 0,
+ "\t reduced_decision_function_trainer::train(x,y)"
+ << "\n\t You have tried to use an uninitialized version of this object"
+ << "\n\t num_bv: " << num_bv );
+ return do_train(mat(x), mat(y));
+ }
+
+ private:
+
+ // ------------------------------------------------------------------------------------
+
+ template <
+ typename in_sample_vector_type,
+ typename in_scalar_vector_type
+ >
+ const decision_function<kernel_type> do_train (
+ const in_sample_vector_type& x,
+ const in_scalar_vector_type& y
+ ) const
+ {
+ // get the decision function object we are going to try and approximate
+ const decision_function<kernel_type>& dec_funct = trainer.train(x,y);
+
+ // now find a linearly independent subset of the training points of num_bv points.
+ linearly_independent_subset_finder<kernel_type> lisf(dec_funct.kernel_function, num_bv);
+ fill_lisf(lisf, x);
+
+ // The next few statements just find the best weights with which to approximate
+ // the dec_funct object with the smaller set of vectors in the lisf dictionary. This
+ // is really just a simple application of some linear algebra. For the details
+ // see page 554 of Learning with kernels by Scholkopf and Smola where they talk
+ // about "Optimal Expansion Coefficients."
+
+ const kernel_type kern(dec_funct.kernel_function);
+
+ matrix<scalar_type,0,1,mem_manager_type> alpha;
+
+ alpha = lisf.get_inv_kernel_marix()*(kernel_matrix(kern,lisf,dec_funct.basis_vectors)*dec_funct.alpha);
+
+ decision_function<kernel_type> new_df(alpha,
+ 0,
+ kern,
+ lisf.get_dictionary());
+
+ // now we have to figure out what the new bias should be. It might be a little
+ // different since we just messed with all the weights and vectors.
+ double bias = 0;
+ for (long i = 0; i < x.nr(); ++i)
+ {
+ bias += new_df(x(i)) - dec_funct(x(i));
+ }
+
+ new_df.b = bias/x.nr();
+
+ return new_df;
+ }
+
+ // ------------------------------------------------------------------------------------
+
+ trainer_type trainer;
+ unsigned long num_bv;
+
+
+ }; // end of class reduced_decision_function_trainer
+
+ template <typename trainer_type>
+ const reduced_decision_function_trainer<trainer_type> reduced (
+ const trainer_type& trainer,
+ const unsigned long num_bv
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(num_bv > 0,
+ "\tconst reduced_decision_function_trainer reduced()"
+ << "\n\t you have given invalid arguments to this function"
+ << "\n\t num_bv: " << num_bv
+ );
+
+ return reduced_decision_function_trainer<trainer_type>(trainer, num_bv);
+ }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ namespace red_impl
+ {
+
+ // ------------------------------------------------------------------------------------
+
+ template <typename kernel_type>
+ class objective
+ {
+ /*
+ This object represents the objective function we will try to
+ minimize in approximate_distance_function().
+
+ The objective is the distance, in kernel induced feature space, between
+ the original distance function and the approximated version.
+
+ */
+ typedef typename kernel_type::scalar_type scalar_type;
+ typedef typename kernel_type::sample_type sample_type;
+ typedef typename kernel_type::mem_manager_type mem_manager_type;
+ public:
+ objective(
+ const distance_function<kernel_type>& dist_funct_,
+ matrix<scalar_type,0,1,mem_manager_type>& b_,
+ matrix<sample_type,0,1,mem_manager_type>& out_vectors_
+ ) :
+ dist_funct(dist_funct_),
+ b(b_),
+ out_vectors(out_vectors_)
+ {
+ }
+
+ const matrix<scalar_type, 0, 1, mem_manager_type> state_to_vector (
+ ) const
+ /*!
+ ensures
+ - returns a vector that contains all the information necessary to
+ reproduce the current state of the approximated distance function
+ !*/
+ {
+ matrix<scalar_type, 0, 1, mem_manager_type> z(b.nr() + out_vectors.size()*out_vectors(0).nr());
+ long i = 0;
+ for (long j = 0; j < b.nr(); ++j)
+ {
+ z(i) = b(j);
+ ++i;
+ }
+
+ for (long j = 0; j < out_vectors.size(); ++j)
+ {
+ for (long k = 0; k < out_vectors(j).size(); ++k)
+ {
+ z(i) = out_vectors(j)(k);
+ ++i;
+ }
+ }
+ return z;
+ }
+
+
+ void vector_to_state (
+ const matrix<scalar_type, 0, 1, mem_manager_type>& z
+ ) const
+ /*!
+ requires
+ - z came from the state_to_vector() function or has a compatible format
+ ensures
+ - loads the vector z into the state variables of the approximate
+ distance function (i.e. b and out_vectors)
+ !*/
+ {
+ long i = 0;
+ for (long j = 0; j < b.nr(); ++j)
+ {
+ b(j) = z(i);
+ ++i;
+ }
+
+ for (long j = 0; j < out_vectors.size(); ++j)
+ {
+ for (long k = 0; k < out_vectors(j).size(); ++k)
+ {
+ out_vectors(j)(k) = z(i);
+ ++i;
+ }
+ }
+ }
+
+ double operator() (
+ const matrix<scalar_type, 0, 1, mem_manager_type>& z
+ ) const
+ /*!
+ ensures
+ - loads the current approximate distance function with z
+ - returns the distance between the original distance function
+ and the approximate one.
+ !*/
+ {
+ vector_to_state(z);
+ const kernel_type k(dist_funct.get_kernel());
+
+ double temp = 0;
+ for (long i = 0; i < out_vectors.size(); ++i)
+ {
+ for (long j = 0; j < dist_funct.get_basis_vectors().nr(); ++j)
+ {
+ temp -= b(i)*dist_funct.get_alpha()(j)*k(out_vectors(i), dist_funct.get_basis_vectors()(j));
+ }
+ }
+
+ temp *= 2;
+
+ for (long i = 0; i < out_vectors.size(); ++i)
+ {
+ for (long j = 0; j < out_vectors.size(); ++j)
+ {
+ temp += b(i)*b(j)*k(out_vectors(i), out_vectors(j));
+ }
+ }
+
+ return temp + dist_funct.get_squared_norm();
+ }
+
+ private:
+
+ const distance_function<kernel_type>& dist_funct;
+ matrix<scalar_type,0,1,mem_manager_type>& b;
+ matrix<sample_type,0,1,mem_manager_type>& out_vectors;
+
+ };
+
+ // ------------------------------------------------------------------------------------
+
+ template <typename kernel_type>
+ class objective_derivative
+ {
+ /*!
+ This object represents the derivative of the objective object
+ !*/
+ typedef typename kernel_type::scalar_type scalar_type;
+ typedef typename kernel_type::sample_type sample_type;
+ typedef typename kernel_type::mem_manager_type mem_manager_type;
+ public:
+
+
+ objective_derivative(
+ const distance_function<kernel_type>& dist_funct_,
+ matrix<scalar_type,0,1,mem_manager_type>& b_,
+ matrix<sample_type,0,1,mem_manager_type>& out_vectors_
+ ) :
+ dist_funct(dist_funct_),
+ b(b_),
+ out_vectors(out_vectors_)
+ {
+ }
+
+ void vector_to_state (
+ const matrix<scalar_type, 0, 1, mem_manager_type>& z
+ ) const
+ /*!
+ requires
+ - z came from the state_to_vector() function or has a compatible format
+ ensures
+ - loads the vector z into the state variables of the approximate
+ distance function (i.e. b and out_vectors)
+ !*/
+ {
+ long i = 0;
+ for (long j = 0; j < b.nr(); ++j)
+ {
+ b(j) = z(i);
+ ++i;
+ }
+
+ for (long j = 0; j < out_vectors.size(); ++j)
+ {
+ for (long k = 0; k < out_vectors(j).size(); ++k)
+ {
+ out_vectors(j)(k) = z(i);
+ ++i;
+ }
+ }
+ }
+
+ const matrix<scalar_type,0,1,mem_manager_type>& operator() (
+ const matrix<scalar_type, 0, 1, mem_manager_type>& z
+ ) const
+ /*!
+ ensures
+ - loads the current approximate distance function with z
+ - returns the derivative of the distance between the original
+ distance function and the approximate one.
+ !*/
+ {
+ vector_to_state(z);
+ res.set_size(z.nr());
+ set_all_elements(res,0);
+ const kernel_type k(dist_funct.get_kernel());
+ const kernel_derivative<kernel_type> K_der(k);
+
+ // first compute the gradient for the beta weights
+ for (long i = 0; i < out_vectors.size(); ++i)
+ {
+ for (long j = 0; j < out_vectors.size(); ++j)
+ {
+ res(i) += b(j)*k(out_vectors(i), out_vectors(j));
+ }
+ }
+ for (long i = 0; i < out_vectors.size(); ++i)
+ {
+ for (long j = 0; j < dist_funct.get_basis_vectors().size(); ++j)
+ {
+ res(i) -= dist_funct.get_alpha()(j)*k(out_vectors(i), dist_funct.get_basis_vectors()(j));
+ }
+ }
+
+
+ // now compute the gradient of the actual vectors that go into the kernel functions
+ long pos = out_vectors.size();
+ const long num = out_vectors(0).nr();
+ temp.set_size(num,1);
+ for (long i = 0; i < out_vectors.size(); ++i)
+ {
+ set_all_elements(temp,0);
+ for (long j = 0; j < out_vectors.size(); ++j)
+ {
+ temp += b(j)*K_der(out_vectors(j), out_vectors(i));
+ }
+ for (long j = 0; j < dist_funct.get_basis_vectors().nr(); ++j)
+ {
+ temp -= dist_funct.get_alpha()(j)*K_der(dist_funct.get_basis_vectors()(j), out_vectors(i) );
+ }
+
+ // store the gradient for out_vectors(i) into result in the proper spot
+ set_subm(res,pos,0,num,1) = b(i)*temp;
+ pos += num;
+ }
+
+
+ res *= 2;
+ return res;
+ }
+
+ private:
+
+ mutable matrix<scalar_type, 0, 1, mem_manager_type> res;
+ mutable sample_type temp;
+
+ const distance_function<kernel_type>& dist_funct;
+ matrix<scalar_type,0,1,mem_manager_type>& b;
+ matrix<sample_type,0,1,mem_manager_type>& out_vectors;
+
+ };
+
+ // ------------------------------------------------------------------------------------
+
+ }
+
+ template <
+ typename K,
+ typename stop_strategy_type,
+ typename T
+ >
+ distance_function<K> approximate_distance_function (
+ stop_strategy_type stop_strategy,
+ const distance_function<K>& target,
+ const T& starting_basis
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(target.get_basis_vectors().size() > 0 &&
+ starting_basis.size() > 0,
+ "\t distance_function approximate_distance_function()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t target.get_basis_vectors().size(): " << target.get_basis_vectors().size()
+ << "\n\t starting_basis.size(): " << starting_basis.size()
+ );
+
+ using namespace red_impl;
+ // The next few statements just find the best weights with which to approximate
+ // the target object with the set of basis vectors in starting_basis. This
+ // is really just a simple application of some linear algebra. For the details
+ // see page 554 of Learning with kernels by Scholkopf and Smola where they talk
+ // about "Optimal Expansion Coefficients."
+
+ const K kern(target.get_kernel());
+ typedef typename K::scalar_type scalar_type;
+ typedef typename K::sample_type sample_type;
+ typedef typename K::mem_manager_type mem_manager_type;
+
+ matrix<scalar_type,0,1,mem_manager_type> beta;
+
+ // Now we compute the fist approximate distance function.
+ beta = pinv(kernel_matrix(kern,starting_basis)) *
+ (kernel_matrix(kern,starting_basis,target.get_basis_vectors())*target.get_alpha());
+ matrix<sample_type,0,1,mem_manager_type> out_vectors(mat(starting_basis));
+
+
+ // Now setup to do a global optimization of all the parameters in the approximate
+ // distance function.
+ const objective<K> obj(target, beta, out_vectors);
+ const objective_derivative<K> obj_der(target, beta, out_vectors);
+ matrix<scalar_type,0,1,mem_manager_type> opt_starting_point(obj.state_to_vector());
+
+
+ // perform a full optimization of all the parameters (i.e. both beta and the basis vectors together)
+ find_min(lbfgs_search_strategy(20),
+ stop_strategy,
+ obj, obj_der, opt_starting_point, 0);
+
+ // now make sure that the final optimized value is loaded into the beta and
+ // out_vectors matrices
+ obj.vector_to_state(opt_starting_point);
+
+ // Do a final reoptimization of beta just to make sure it is optimal given the new
+ // set of basis vectors.
+ beta = pinv(kernel_matrix(kern,out_vectors))*(kernel_matrix(kern,out_vectors,target.get_basis_vectors())*target.get_alpha());
+
+ // It is possible that some of the beta weights will be very close to zero. Lets remove
+ // the basis vectors with these essentially zero weights.
+ const scalar_type eps = max(abs(beta))*std::numeric_limits<scalar_type>::epsilon();
+ for (long i = 0; i < beta.size(); ++i)
+ {
+ // if beta(i) is zero (but leave at least one beta no matter what)
+ if (std::abs(beta(i)) < eps && beta.size() > 1)
+ {
+ beta = remove_row(beta, i);
+ out_vectors = remove_row(out_vectors, i);
+ --i;
+ }
+ }
+
+ return distance_function<K>(beta, kern, out_vectors);
+ }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename trainer_type
+ >
+ class reduced_decision_function_trainer2
+ {
+ public:
+ typedef typename trainer_type::kernel_type kernel_type;
+ typedef typename trainer_type::scalar_type scalar_type;
+ typedef typename trainer_type::sample_type sample_type;
+ typedef typename trainer_type::mem_manager_type mem_manager_type;
+ typedef typename trainer_type::trained_function_type trained_function_type;
+
+ reduced_decision_function_trainer2 () : num_bv(0) {}
+ reduced_decision_function_trainer2 (
+ const trainer_type& trainer_,
+ const long num_sb_,
+ const double eps_ = 1e-3
+ ) :
+ trainer(trainer_),
+ num_bv(num_sb_),
+ eps(eps_)
+ {
+ COMPILE_TIME_ASSERT(is_matrix<sample_type>::value);
+
+ // make sure requires clause is not broken
+ DLIB_ASSERT(num_bv > 0 && eps > 0,
+ "\t reduced_decision_function_trainer2()"
+ << "\n\t you have given invalid arguments to this function"
+ << "\n\t num_bv: " << num_bv
+ << "\n\t eps: " << eps
+ );
+ }
+
+ template <
+ typename in_sample_vector_type,
+ typename in_scalar_vector_type
+ >
+ const decision_function<kernel_type> train (
+ const in_sample_vector_type& x,
+ const in_scalar_vector_type& y
+ ) const
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(num_bv > 0,
+ "\t reduced_decision_function_trainer2::train(x,y)"
+ << "\n\t You have tried to use an uninitialized version of this object"
+ << "\n\t num_bv: " << num_bv );
+ return do_train(mat(x), mat(y));
+ }
+
+ private:
+
+ template <
+ typename in_sample_vector_type,
+ typename in_scalar_vector_type
+ >
+ const decision_function<kernel_type> do_train (
+ const in_sample_vector_type& x,
+ const in_scalar_vector_type& y
+ ) const
+ {
+ // get the decision function object we are going to try and approximate
+ const decision_function<kernel_type>& dec_funct = trainer.train(x,y);
+ const kernel_type kern(dec_funct.kernel_function);
+
+ // now find a linearly independent subset of the training points of num_bv points.
+ linearly_independent_subset_finder<kernel_type> lisf(kern, num_bv);
+ fill_lisf(lisf,x);
+
+ distance_function<kernel_type> approx, target;
+ target = dec_funct;
+ approx = approximate_distance_function(objective_delta_stop_strategy(eps), target, lisf);
+
+ decision_function<kernel_type> new_df(approx.get_alpha(),
+ 0,
+ kern,
+ approx.get_basis_vectors());
+
+ // now we have to figure out what the new bias should be. It might be a little
+ // different since we just messed with all the weights and vectors.
+ double bias = 0;
+ for (long i = 0; i < x.nr(); ++i)
+ {
+ bias += new_df(x(i)) - dec_funct(x(i));
+ }
+
+ new_df.b = bias/x.nr();
+
+ return new_df;
+
+ }
+
+ // ------------------------------------------------------------------------------------
+
+ trainer_type trainer;
+ long num_bv;
+ double eps;
+
+
+ }; // end of class reduced_decision_function_trainer2
+
+ template <typename trainer_type>
+ const reduced_decision_function_trainer2<trainer_type> reduced2 (
+ const trainer_type& trainer,
+ const long num_bv,
+ double eps = 1e-3
+ )
+ {
+ COMPILE_TIME_ASSERT(is_matrix<typename trainer_type::sample_type>::value);
+
+ // make sure requires clause is not broken
+ DLIB_ASSERT(num_bv > 0 && eps > 0,
+ "\tconst reduced_decision_function_trainer2 reduced2()"
+ << "\n\t you have given invalid arguments to this function"
+ << "\n\t num_bv: " << num_bv
+ << "\n\t eps: " << eps
+ );
+
+ return reduced_decision_function_trainer2<trainer_type>(trainer, num_bv, eps);
+ }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_REDUCEd_TRAINERS_
+