summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/optimization/optimization_solve_qp2_using_smo.h
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/dlib/optimization/optimization_solve_qp2_using_smo.h')
-rw-r--r--ml/dlib/dlib/optimization/optimization_solve_qp2_using_smo.h468
1 files changed, 468 insertions, 0 deletions
diff --git a/ml/dlib/dlib/optimization/optimization_solve_qp2_using_smo.h b/ml/dlib/dlib/optimization/optimization_solve_qp2_using_smo.h
new file mode 100644
index 000000000..88cad0cf3
--- /dev/null
+++ b/ml/dlib/dlib/optimization/optimization_solve_qp2_using_smo.h
@@ -0,0 +1,468 @@
+// Copyright (C) 2007 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_SOLVE_QP2_USING_SMo_Hh_
+#define DLIB_SOLVE_QP2_USING_SMo_Hh_
+
+#include "optimization_solve_qp2_using_smo_abstract.h"
+#include <cmath>
+#include <limits>
+#include <sstream>
+#include "../matrix.h"
+#include "../algs.h"
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ class invalid_nu_error : public dlib::error
+ {
+ public:
+ invalid_nu_error(const std::string& msg, double nu_) : dlib::error(msg), nu(nu_) {};
+ const double nu;
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename T
+ >
+ typename T::type maximum_nu_impl (
+ const T& y
+ )
+ {
+ typedef typename T::type scalar_type;
+ // make sure requires clause is not broken
+ DLIB_ASSERT(y.size() > 1 && is_col_vector(y),
+ "\ttypedef T::type maximum_nu(y)"
+ << "\n\ty should be a column vector with more than one entry"
+ << "\n\ty.nr(): " << y.nr()
+ << "\n\ty.nc(): " << y.nc()
+ );
+
+ long pos_count = 0;
+ long neg_count = 0;
+ for (long r = 0; r < y.nr(); ++r)
+ {
+ if (y(r) == 1.0)
+ {
+ ++pos_count;
+ }
+ else if (y(r) == -1.0)
+ {
+ ++neg_count;
+ }
+ else
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(y(r) == -1.0 || y(r) == 1.0,
+ "\ttypedef T::type maximum_nu(y)"
+ << "\n\ty should contain only 1 and 0 entries"
+ << "\n\tr: " << r
+ << "\n\ty(r): " << y(r)
+ );
+ }
+ }
+ return static_cast<scalar_type>(2.0*(scalar_type)std::min(pos_count,neg_count)/(scalar_type)y.nr());
+ }
+
+ template <
+ typename T
+ >
+ typename T::type maximum_nu (
+ const T& y
+ )
+ {
+ return maximum_nu_impl(mat(y));
+ }
+
+ template <
+ typename T
+ >
+ typename T::value_type maximum_nu (
+ const T& y
+ )
+ {
+ return maximum_nu_impl(mat(y));
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename matrix_type
+ >
+ class solve_qp2_using_smo
+ {
+ public:
+ typedef typename matrix_type::mem_manager_type mem_manager_type;
+ typedef typename matrix_type::type scalar_type;
+ typedef typename matrix_type::layout_type layout_type;
+ typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix;
+ typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix;
+
+
+ template <
+ typename EXP1,
+ typename EXP2,
+ long NR
+ >
+ unsigned long operator() (
+ const matrix_exp<EXP1>& Q,
+ const matrix_exp<EXP2>& y,
+ const scalar_type nu,
+ matrix<scalar_type,NR,1,mem_manager_type, layout_type>& alpha,
+ scalar_type eps
+ )
+ {
+ DLIB_ASSERT(Q.nr() == Q.nc() && y.size() == Q.nr() && y.size() > 1 && is_col_vector(y) &&
+ sum((y == +1) + (y == -1)) == y.size() &&
+ 0 < nu && nu <= 1 &&
+ eps > 0,
+ "\t void solve_qp2_using_smo::operator()"
+ << "\n\t invalid arguments were given to this function"
+ << "\n\t Q.nr(): " << Q.nr()
+ << "\n\t Q.nc(): " << Q.nc()
+ << "\n\t is_col_vector(y): " << is_col_vector(y)
+ << "\n\t y.size(): " << y.size()
+ << "\n\t sum((y == +1) + (y == -1)): " << sum((y == +1) + (y == -1))
+ << "\n\t nu: " << nu
+ << "\n\t eps: " << eps
+ );
+
+ alpha.set_size(Q.nr(),1);
+ df.set_size(Q.nr());
+
+ // now initialize alpha
+ set_initial_alpha(y, nu, alpha);
+
+ const scalar_type tau = 1e-12;
+
+ typedef typename colm_exp<EXP1>::type col_type;
+
+ set_all_elements(df, 0);
+ // initialize df. Compute df = Q*alpha
+ for (long r = 0; r < df.nr(); ++r)
+ {
+ if (alpha(r) != 0)
+ {
+ df += alpha(r)*matrix_cast<scalar_type>(colm(Q,r));
+ }
+ }
+
+ unsigned long count = 0;
+
+ // now perform the actual optimization of alpha
+ long i=0, j=0;
+ while (find_working_group(y,alpha,Q,df,tau,eps,i,j))
+ {
+ ++count;
+ const scalar_type old_alpha_i = alpha(i);
+ const scalar_type old_alpha_j = alpha(j);
+
+ optimize_working_pair(alpha,Q,df,tau,i,j);
+
+ // update the df vector now that we have modified alpha(i) and alpha(j)
+ scalar_type delta_alpha_i = alpha(i) - old_alpha_i;
+ scalar_type delta_alpha_j = alpha(j) - old_alpha_j;
+
+ col_type Q_i = colm(Q,i);
+ col_type Q_j = colm(Q,j);
+ for(long k = 0; k < df.nr(); ++k)
+ df(k) += Q_i(k)*delta_alpha_i + Q_j(k)*delta_alpha_j;
+ }
+
+ return count;
+ }
+
+ const column_matrix& get_gradient (
+ ) const { return df; }
+
+ private:
+
+ // -------------------------------------------------------------------------------------
+
+ template <
+ typename scalar_type,
+ typename scalar_vector_type,
+ typename scalar_vector_type2
+ >
+ inline void set_initial_alpha (
+ const scalar_vector_type& y,
+ const scalar_type nu,
+ scalar_vector_type2& alpha
+ ) const
+ {
+ set_all_elements(alpha,0);
+ const scalar_type l = y.nr();
+ scalar_type temp = nu*l/2;
+ long num = (long)std::floor(temp);
+ long num_total = (long)std::ceil(temp);
+
+ bool has_slack = false;
+ int count = 0;
+ for (int i = 0; i < alpha.nr(); ++i)
+ {
+ if (y(i) == 1)
+ {
+ if (count < num)
+ {
+ ++count;
+ alpha(i) = 1;
+ }
+ else
+ {
+ has_slack = true;
+ if (num_total > num)
+ {
+ ++count;
+ alpha(i) = temp - std::floor(temp);
+ }
+ break;
+ }
+ }
+ }
+
+ if (count != num_total || has_slack == false)
+ {
+ std::ostringstream sout;
+ sout << "Invalid nu of " << nu << ". It is required that: 0 < nu < " << 2*(scalar_type)count/y.nr();
+ throw invalid_nu_error(sout.str(),nu);
+ }
+
+ has_slack = false;
+ count = 0;
+ for (int i = 0; i < alpha.nr(); ++i)
+ {
+ if (y(i) == -1)
+ {
+ if (count < num)
+ {
+ ++count;
+ alpha(i) = 1;
+ }
+ else
+ {
+ has_slack = true;
+ if (num_total > num)
+ {
+ ++count;
+ alpha(i) = temp - std::floor(temp);
+ }
+ break;
+ }
+ }
+ }
+
+ if (count != num_total || has_slack == false)
+ {
+ std::ostringstream sout;
+ sout << "Invalid nu of " << nu << ". It is required that: 0 < nu < " << 2*(scalar_type)count/y.nr();
+ throw invalid_nu_error(sout.str(),nu);
+ }
+ }
+
+ // ------------------------------------------------------------------------------------
+
+ template <
+ typename scalar_vector_type,
+ typename scalar_type,
+ typename EXP,
+ typename U, typename V
+ >
+ inline bool find_working_group (
+ const V& y,
+ const U& alpha,
+ const matrix_exp<EXP>& Q,
+ const scalar_vector_type& df,
+ const scalar_type tau,
+ const scalar_type eps,
+ long& i_out,
+ long& j_out
+ ) const
+ {
+ using namespace std;
+ long ip = 0;
+ long jp = 0;
+ long in = 0;
+ long jn = 0;
+
+
+ typedef typename colm_exp<EXP>::type col_type;
+ typedef typename diag_exp<EXP>::type diag_type;
+
+ scalar_type ip_val = -numeric_limits<scalar_type>::infinity();
+ scalar_type jp_val = numeric_limits<scalar_type>::infinity();
+ scalar_type in_val = -numeric_limits<scalar_type>::infinity();
+ scalar_type jn_val = numeric_limits<scalar_type>::infinity();
+
+ // loop over the alphas and find the maximum ip and in indices.
+ for (long i = 0; i < alpha.nr(); ++i)
+ {
+ if (y(i) == 1)
+ {
+ if (alpha(i) < 1.0)
+ {
+ if (-df(i) > ip_val)
+ {
+ ip_val = -df(i);
+ ip = i;
+ }
+ }
+ }
+ else
+ {
+ if (alpha(i) > 0.0)
+ {
+ if (df(i) > in_val)
+ {
+ in_val = df(i);
+ in = i;
+ }
+ }
+ }
+ }
+
+ scalar_type Mp = numeric_limits<scalar_type>::infinity();
+ scalar_type Mn = numeric_limits<scalar_type>::infinity();
+
+ // Pick out the columns and diagonal of Q we need below. Doing
+ // it this way is faster if Q is actually a symmetric_matrix_cache
+ // object.
+ col_type Q_ip = colm(Q,ip);
+ col_type Q_in = colm(Q,in);
+ diag_type Q_diag = diag(Q);
+
+
+
+ // now we need to find the minimum jp and jn indices
+ for (long j = 0; j < alpha.nr(); ++j)
+ {
+ if (y(j) == 1)
+ {
+ if (alpha(j) > 0.0)
+ {
+ scalar_type b = ip_val + df(j);
+ if (-df(j) < Mp)
+ Mp = -df(j);
+
+ if (b > 0)
+ {
+ scalar_type a = Q_ip(ip) + Q_diag(j) - 2*Q_ip(j);
+ if (a <= 0)
+ a = tau;
+ scalar_type temp = -b*b/a;
+ if (temp < jp_val)
+ {
+ jp_val = temp;
+ jp = j;
+ }
+ }
+ }
+ }
+ else
+ {
+ if (alpha(j) < 1.0)
+ {
+ scalar_type b = in_val - df(j);
+ if (df(j) < Mn)
+ Mn = df(j);
+
+ if (b > 0)
+ {
+ scalar_type a = Q_in(in) + Q_diag(j) - 2*Q_in(j);
+ if (a <= 0)
+ a = tau;
+ scalar_type temp = -b*b/a;
+ if (temp < jn_val)
+ {
+ jn_val = temp;
+ jn = j;
+ }
+ }
+ }
+ }
+ }
+
+ // if we are at the optimal point then return false so the caller knows
+ // to stop optimizing
+ if (std::max(ip_val - Mp, in_val - Mn) < eps)
+ return false;
+
+ if (jp_val < jn_val)
+ {
+ i_out = ip;
+ j_out = jp;
+ }
+ else
+ {
+ i_out = in;
+ j_out = jn;
+ }
+
+ return true;
+ }
+
+ // ------------------------------------------------------------------------------------
+
+ template <
+ typename EXP,
+ typename T, typename U
+ >
+ inline void optimize_working_pair (
+ T& alpha,
+ const matrix_exp<EXP>& Q,
+ const U& df,
+ const scalar_type tau,
+ const long i,
+ const long j
+ ) const
+ {
+ scalar_type quad_coef = Q(i,i)+Q(j,j)-2*Q(j,i);
+ if (quad_coef <= 0)
+ quad_coef = tau;
+ scalar_type delta = (df(i)-df(j))/quad_coef;
+ scalar_type sum = alpha(i) + alpha(j);
+ alpha(i) -= delta;
+ alpha(j) += delta;
+
+ if(sum > 1)
+ {
+ if(alpha(i) > 1)
+ {
+ alpha(i) = 1;
+ alpha(j) = sum - 1;
+ }
+ else if(alpha(j) > 1)
+ {
+ alpha(j) = 1;
+ alpha(i) = sum - 1;
+ }
+ }
+ else
+ {
+ if(alpha(j) < 0)
+ {
+ alpha(j) = 0;
+ alpha(i) = sum;
+ }
+ else if(alpha(i) < 0)
+ {
+ alpha(i) = 0;
+ alpha(j) = sum;
+ }
+ }
+ }
+
+ // ------------------------------------------------------------------------------------
+
+ column_matrix df; // gradient of f(alpha)
+ };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SOLVE_QP2_USING_SMo_Hh_
+
+