diff options
Diffstat (limited to 'ml/dlib/dlib/optimization/optimization_solve_qp2_using_smo.h')
-rw-r--r-- | ml/dlib/dlib/optimization/optimization_solve_qp2_using_smo.h | 468 |
1 files changed, 468 insertions, 0 deletions
diff --git a/ml/dlib/dlib/optimization/optimization_solve_qp2_using_smo.h b/ml/dlib/dlib/optimization/optimization_solve_qp2_using_smo.h new file mode 100644 index 000000000..88cad0cf3 --- /dev/null +++ b/ml/dlib/dlib/optimization/optimization_solve_qp2_using_smo.h @@ -0,0 +1,468 @@ +// Copyright (C) 2007 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_SOLVE_QP2_USING_SMo_Hh_ +#define DLIB_SOLVE_QP2_USING_SMo_Hh_ + +#include "optimization_solve_qp2_using_smo_abstract.h" +#include <cmath> +#include <limits> +#include <sstream> +#include "../matrix.h" +#include "../algs.h" + + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + class invalid_nu_error : public dlib::error + { + public: + invalid_nu_error(const std::string& msg, double nu_) : dlib::error(msg), nu(nu_) {}; + const double nu; + }; + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + typename T::type maximum_nu_impl ( + const T& y + ) + { + typedef typename T::type scalar_type; + // make sure requires clause is not broken + DLIB_ASSERT(y.size() > 1 && is_col_vector(y), + "\ttypedef T::type maximum_nu(y)" + << "\n\ty should be a column vector with more than one entry" + << "\n\ty.nr(): " << y.nr() + << "\n\ty.nc(): " << y.nc() + ); + + long pos_count = 0; + long neg_count = 0; + for (long r = 0; r < y.nr(); ++r) + { + if (y(r) == 1.0) + { + ++pos_count; + } + else if (y(r) == -1.0) + { + ++neg_count; + } + else + { + // make sure requires clause is not broken + DLIB_ASSERT(y(r) == -1.0 || y(r) == 1.0, + "\ttypedef T::type maximum_nu(y)" + << "\n\ty should contain only 1 and 0 entries" + << "\n\tr: " << r + << "\n\ty(r): " << y(r) + ); + } + } + return static_cast<scalar_type>(2.0*(scalar_type)std::min(pos_count,neg_count)/(scalar_type)y.nr()); + } + + template < + typename T + > + typename T::type maximum_nu ( + const T& y + ) + { + return maximum_nu_impl(mat(y)); + } + + template < + typename T + > + typename T::value_type maximum_nu ( + const T& y + ) + { + return maximum_nu_impl(mat(y)); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename matrix_type + > + class solve_qp2_using_smo + { + public: + typedef typename matrix_type::mem_manager_type mem_manager_type; + typedef typename matrix_type::type scalar_type; + typedef typename matrix_type::layout_type layout_type; + typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix; + typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix; + + + template < + typename EXP1, + typename EXP2, + long NR + > + unsigned long operator() ( + const matrix_exp<EXP1>& Q, + const matrix_exp<EXP2>& y, + const scalar_type nu, + matrix<scalar_type,NR,1,mem_manager_type, layout_type>& alpha, + scalar_type eps + ) + { + DLIB_ASSERT(Q.nr() == Q.nc() && y.size() == Q.nr() && y.size() > 1 && is_col_vector(y) && + sum((y == +1) + (y == -1)) == y.size() && + 0 < nu && nu <= 1 && + eps > 0, + "\t void solve_qp2_using_smo::operator()" + << "\n\t invalid arguments were given to this function" + << "\n\t Q.nr(): " << Q.nr() + << "\n\t Q.nc(): " << Q.nc() + << "\n\t is_col_vector(y): " << is_col_vector(y) + << "\n\t y.size(): " << y.size() + << "\n\t sum((y == +1) + (y == -1)): " << sum((y == +1) + (y == -1)) + << "\n\t nu: " << nu + << "\n\t eps: " << eps + ); + + alpha.set_size(Q.nr(),1); + df.set_size(Q.nr()); + + // now initialize alpha + set_initial_alpha(y, nu, alpha); + + const scalar_type tau = 1e-12; + + typedef typename colm_exp<EXP1>::type col_type; + + set_all_elements(df, 0); + // initialize df. Compute df = Q*alpha + for (long r = 0; r < df.nr(); ++r) + { + if (alpha(r) != 0) + { + df += alpha(r)*matrix_cast<scalar_type>(colm(Q,r)); + } + } + + unsigned long count = 0; + + // now perform the actual optimization of alpha + long i=0, j=0; + while (find_working_group(y,alpha,Q,df,tau,eps,i,j)) + { + ++count; + const scalar_type old_alpha_i = alpha(i); + const scalar_type old_alpha_j = alpha(j); + + optimize_working_pair(alpha,Q,df,tau,i,j); + + // update the df vector now that we have modified alpha(i) and alpha(j) + scalar_type delta_alpha_i = alpha(i) - old_alpha_i; + scalar_type delta_alpha_j = alpha(j) - old_alpha_j; + + col_type Q_i = colm(Q,i); + col_type Q_j = colm(Q,j); + for(long k = 0; k < df.nr(); ++k) + df(k) += Q_i(k)*delta_alpha_i + Q_j(k)*delta_alpha_j; + } + + return count; + } + + const column_matrix& get_gradient ( + ) const { return df; } + + private: + + // ------------------------------------------------------------------------------------- + + template < + typename scalar_type, + typename scalar_vector_type, + typename scalar_vector_type2 + > + inline void set_initial_alpha ( + const scalar_vector_type& y, + const scalar_type nu, + scalar_vector_type2& alpha + ) const + { + set_all_elements(alpha,0); + const scalar_type l = y.nr(); + scalar_type temp = nu*l/2; + long num = (long)std::floor(temp); + long num_total = (long)std::ceil(temp); + + bool has_slack = false; + int count = 0; + for (int i = 0; i < alpha.nr(); ++i) + { + if (y(i) == 1) + { + if (count < num) + { + ++count; + alpha(i) = 1; + } + else + { + has_slack = true; + if (num_total > num) + { + ++count; + alpha(i) = temp - std::floor(temp); + } + break; + } + } + } + + if (count != num_total || has_slack == false) + { + std::ostringstream sout; + sout << "Invalid nu of " << nu << ". It is required that: 0 < nu < " << 2*(scalar_type)count/y.nr(); + throw invalid_nu_error(sout.str(),nu); + } + + has_slack = false; + count = 0; + for (int i = 0; i < alpha.nr(); ++i) + { + if (y(i) == -1) + { + if (count < num) + { + ++count; + alpha(i) = 1; + } + else + { + has_slack = true; + if (num_total > num) + { + ++count; + alpha(i) = temp - std::floor(temp); + } + break; + } + } + } + + if (count != num_total || has_slack == false) + { + std::ostringstream sout; + sout << "Invalid nu of " << nu << ". It is required that: 0 < nu < " << 2*(scalar_type)count/y.nr(); + throw invalid_nu_error(sout.str(),nu); + } + } + + // ------------------------------------------------------------------------------------ + + template < + typename scalar_vector_type, + typename scalar_type, + typename EXP, + typename U, typename V + > + inline bool find_working_group ( + const V& y, + const U& alpha, + const matrix_exp<EXP>& Q, + const scalar_vector_type& df, + const scalar_type tau, + const scalar_type eps, + long& i_out, + long& j_out + ) const + { + using namespace std; + long ip = 0; + long jp = 0; + long in = 0; + long jn = 0; + + + typedef typename colm_exp<EXP>::type col_type; + typedef typename diag_exp<EXP>::type diag_type; + + scalar_type ip_val = -numeric_limits<scalar_type>::infinity(); + scalar_type jp_val = numeric_limits<scalar_type>::infinity(); + scalar_type in_val = -numeric_limits<scalar_type>::infinity(); + scalar_type jn_val = numeric_limits<scalar_type>::infinity(); + + // loop over the alphas and find the maximum ip and in indices. + for (long i = 0; i < alpha.nr(); ++i) + { + if (y(i) == 1) + { + if (alpha(i) < 1.0) + { + if (-df(i) > ip_val) + { + ip_val = -df(i); + ip = i; + } + } + } + else + { + if (alpha(i) > 0.0) + { + if (df(i) > in_val) + { + in_val = df(i); + in = i; + } + } + } + } + + scalar_type Mp = numeric_limits<scalar_type>::infinity(); + scalar_type Mn = numeric_limits<scalar_type>::infinity(); + + // Pick out the columns and diagonal of Q we need below. Doing + // it this way is faster if Q is actually a symmetric_matrix_cache + // object. + col_type Q_ip = colm(Q,ip); + col_type Q_in = colm(Q,in); + diag_type Q_diag = diag(Q); + + + + // now we need to find the minimum jp and jn indices + for (long j = 0; j < alpha.nr(); ++j) + { + if (y(j) == 1) + { + if (alpha(j) > 0.0) + { + scalar_type b = ip_val + df(j); + if (-df(j) < Mp) + Mp = -df(j); + + if (b > 0) + { + scalar_type a = Q_ip(ip) + Q_diag(j) - 2*Q_ip(j); + if (a <= 0) + a = tau; + scalar_type temp = -b*b/a; + if (temp < jp_val) + { + jp_val = temp; + jp = j; + } + } + } + } + else + { + if (alpha(j) < 1.0) + { + scalar_type b = in_val - df(j); + if (df(j) < Mn) + Mn = df(j); + + if (b > 0) + { + scalar_type a = Q_in(in) + Q_diag(j) - 2*Q_in(j); + if (a <= 0) + a = tau; + scalar_type temp = -b*b/a; + if (temp < jn_val) + { + jn_val = temp; + jn = j; + } + } + } + } + } + + // if we are at the optimal point then return false so the caller knows + // to stop optimizing + if (std::max(ip_val - Mp, in_val - Mn) < eps) + return false; + + if (jp_val < jn_val) + { + i_out = ip; + j_out = jp; + } + else + { + i_out = in; + j_out = jn; + } + + return true; + } + + // ------------------------------------------------------------------------------------ + + template < + typename EXP, + typename T, typename U + > + inline void optimize_working_pair ( + T& alpha, + const matrix_exp<EXP>& Q, + const U& df, + const scalar_type tau, + const long i, + const long j + ) const + { + scalar_type quad_coef = Q(i,i)+Q(j,j)-2*Q(j,i); + if (quad_coef <= 0) + quad_coef = tau; + scalar_type delta = (df(i)-df(j))/quad_coef; + scalar_type sum = alpha(i) + alpha(j); + alpha(i) -= delta; + alpha(j) += delta; + + if(sum > 1) + { + if(alpha(i) > 1) + { + alpha(i) = 1; + alpha(j) = sum - 1; + } + else if(alpha(j) > 1) + { + alpha(j) = 1; + alpha(i) = sum - 1; + } + } + else + { + if(alpha(j) < 0) + { + alpha(j) = 0; + alpha(i) = sum; + } + else if(alpha(i) < 0) + { + alpha(i) = 0; + alpha(j) = sum; + } + } + } + + // ------------------------------------------------------------------------------------ + + column_matrix df; // gradient of f(alpha) + }; + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_SOLVE_QP2_USING_SMo_Hh_ + + |