summaryrefslogtreecommitdiffstats
path: root/src/ml/dlib/dlib/optimization/optimization.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/ml/dlib/dlib/optimization/optimization.h')
-rw-r--r--src/ml/dlib/dlib/optimization/optimization.h714
1 files changed, 714 insertions, 0 deletions
diff --git a/src/ml/dlib/dlib/optimization/optimization.h b/src/ml/dlib/dlib/optimization/optimization.h
new file mode 100644
index 000000000..561d64376
--- /dev/null
+++ b/src/ml/dlib/dlib/optimization/optimization.h
@@ -0,0 +1,714 @@
+// Copyright (C) 2008 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_OPTIMIZATIOn_H_
+#define DLIB_OPTIMIZATIOn_H_
+
+#include <cmath>
+#include <limits>
+#include "optimization_abstract.h"
+#include "optimization_search_strategies.h"
+#include "optimization_stop_strategies.h"
+#include "optimization_line_search.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// Functions that transform other functions
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ template <typename funct>
+ class central_differences
+ {
+ public:
+ central_differences(const funct& f_, double eps_ = 1e-7) : f(f_), eps(eps_){}
+
+ template <typename T>
+ typename T::matrix_type operator()(const T& x) const
+ {
+ // T must be some sort of dlib matrix
+ COMPILE_TIME_ASSERT(is_matrix<T>::value);
+
+ typename T::matrix_type der(x.size());
+ typename T::matrix_type e(x);
+ for (long i = 0; i < x.size(); ++i)
+ {
+ const double old_val = e(i);
+
+ e(i) += eps;
+ const double delta_plus = f(e);
+ e(i) = old_val - eps;
+ const double delta_minus = f(e);
+
+ der(i) = (delta_plus - delta_minus)/((old_val+eps)-(old_val-eps));
+
+ // and finally restore the old value of this element
+ e(i) = old_val;
+ }
+
+ return der;
+ }
+
+ template <typename T, typename U>
+ typename U::matrix_type operator()(const T& item, const U& x) const
+ {
+ // U must be some sort of dlib matrix
+ COMPILE_TIME_ASSERT(is_matrix<U>::value);
+
+ typename U::matrix_type der(x.size());
+ typename U::matrix_type e(x);
+ for (long i = 0; i < x.size(); ++i)
+ {
+ const double old_val = e(i);
+
+ e(i) += eps;
+ const double delta_plus = f(item,e);
+ e(i) = old_val - eps;
+ const double delta_minus = f(item,e);
+
+ der(i) = (delta_plus - delta_minus)/((old_val+eps)-(old_val-eps));
+
+ // and finally restore the old value of this element
+ e(i) = old_val;
+ }
+
+ return der;
+ }
+
+
+ double operator()(const double& x) const
+ {
+ return (f(x+eps)-f(x-eps))/((x+eps)-(x-eps));
+ }
+
+ private:
+ const funct& f;
+ const double eps;
+ };
+
+ template <typename funct>
+ const central_differences<funct> derivative(const funct& f) { return central_differences<funct>(f); }
+ template <typename funct>
+ const central_differences<funct> derivative(const funct& f, double eps)
+ {
+ DLIB_ASSERT (
+ eps > 0,
+ "\tcentral_differences derivative(f,eps)"
+ << "\n\tYou must give an epsilon > 0"
+ << "\n\teps: " << eps
+ );
+ return central_differences<funct>(f,eps);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <typename funct, typename EXP1, typename EXP2>
+ struct clamped_function_object
+ {
+ clamped_function_object(
+ const funct& f_,
+ const matrix_exp<EXP1>& x_lower_,
+ const matrix_exp<EXP2>& x_upper_
+ ) : f(f_), x_lower(x_lower_), x_upper(x_upper_)
+ {
+ }
+
+ template <typename T>
+ double operator() (
+ const T& x
+ ) const
+ {
+ return f(clamp(x,x_lower,x_upper));
+ }
+
+ const funct& f;
+ const matrix_exp<EXP1>& x_lower;
+ const matrix_exp<EXP2>& x_upper;
+ };
+
+ template <typename funct, typename EXP1, typename EXP2>
+ clamped_function_object<funct,EXP1,EXP2> clamp_function(
+ const funct& f,
+ const matrix_exp<EXP1>& x_lower,
+ const matrix_exp<EXP2>& x_upper
+ ) { return clamped_function_object<funct,EXP1,EXP2>(f,x_lower,x_upper); }
+
+// ----------------------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// Functions that perform unconstrained optimization
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename search_strategy_type,
+ typename stop_strategy_type,
+ typename funct,
+ typename funct_der,
+ typename T
+ >
+ double find_min (
+ search_strategy_type search_strategy,
+ stop_strategy_type stop_strategy,
+ const funct& f,
+ const funct_der& der,
+ T& x,
+ double min_f
+ )
+ {
+ COMPILE_TIME_ASSERT(is_matrix<T>::value);
+ // The starting point (i.e. x) must be a column vector.
+ COMPILE_TIME_ASSERT(T::NC <= 1);
+
+ DLIB_CASSERT (
+ is_col_vector(x),
+ "\tdouble find_min()"
+ << "\n\tYou have to supply column vectors to this function"
+ << "\n\tx.nc(): " << x.nc()
+ );
+
+
+ T g, s;
+
+ double f_value = f(x);
+ g = der(x);
+
+ if (!is_finite(f_value))
+ throw error("The objective function generated non-finite outputs");
+ if (!is_finite(g))
+ throw error("The objective function generated non-finite outputs");
+
+ while(stop_strategy.should_continue_search(x, f_value, g) && f_value > min_f)
+ {
+ s = search_strategy.get_next_direction(x, f_value, g);
+
+ double alpha = line_search(
+ make_line_search_function(f,x,s, f_value),
+ f_value,
+ make_line_search_function(der,x,s, g),
+ dot(g,s), // compute initial gradient for the line search
+ search_strategy.get_wolfe_rho(), search_strategy.get_wolfe_sigma(), min_f,
+ search_strategy.get_max_line_search_iterations());
+
+ // Take the search step indicated by the above line search
+ x += alpha*s;
+
+ if (!is_finite(f_value))
+ throw error("The objective function generated non-finite outputs");
+ if (!is_finite(g))
+ throw error("The objective function generated non-finite outputs");
+ }
+
+ return f_value;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename search_strategy_type,
+ typename stop_strategy_type,
+ typename funct,
+ typename funct_der,
+ typename T
+ >
+ double find_max (
+ search_strategy_type search_strategy,
+ stop_strategy_type stop_strategy,
+ const funct& f,
+ const funct_der& der,
+ T& x,
+ double max_f
+ )
+ {
+ COMPILE_TIME_ASSERT(is_matrix<T>::value);
+ // The starting point (i.e. x) must be a column vector.
+ COMPILE_TIME_ASSERT(T::NC <= 1);
+
+ DLIB_CASSERT (
+ is_col_vector(x),
+ "\tdouble find_max()"
+ << "\n\tYou have to supply column vectors to this function"
+ << "\n\tx.nc(): " << x.nc()
+ );
+
+ T g, s;
+
+ // This function is basically just a copy of find_min() but with - put in the right places
+ // to flip things around so that it ends up looking for the max rather than the min.
+
+ double f_value = -f(x);
+ g = -der(x);
+
+ if (!is_finite(f_value))
+ throw error("The objective function generated non-finite outputs");
+ if (!is_finite(g))
+ throw error("The objective function generated non-finite outputs");
+
+ while(stop_strategy.should_continue_search(x, f_value, g) && f_value > -max_f)
+ {
+ s = search_strategy.get_next_direction(x, f_value, g);
+
+ double alpha = line_search(
+ negate_function(make_line_search_function(f,x,s, f_value)),
+ f_value,
+ negate_function(make_line_search_function(der,x,s, g)),
+ dot(g,s), // compute initial gradient for the line search
+ search_strategy.get_wolfe_rho(), search_strategy.get_wolfe_sigma(), -max_f,
+ search_strategy.get_max_line_search_iterations()
+ );
+
+ // Take the search step indicated by the above line search
+ x += alpha*s;
+
+ // Don't forget to negate these outputs from the line search since they are
+ // from the unnegated versions of f() and der()
+ g *= -1;
+ f_value *= -1;
+
+ if (!is_finite(f_value))
+ throw error("The objective function generated non-finite outputs");
+ if (!is_finite(g))
+ throw error("The objective function generated non-finite outputs");
+ }
+
+ return -f_value;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename search_strategy_type,
+ typename stop_strategy_type,
+ typename funct,
+ typename T
+ >
+ double find_min_using_approximate_derivatives (
+ search_strategy_type search_strategy,
+ stop_strategy_type stop_strategy,
+ const funct& f,
+ T& x,
+ double min_f,
+ double derivative_eps = 1e-7
+ )
+ {
+ COMPILE_TIME_ASSERT(is_matrix<T>::value);
+ // The starting point (i.e. x) must be a column vector.
+ COMPILE_TIME_ASSERT(T::NC <= 1);
+
+ DLIB_CASSERT (
+ is_col_vector(x) && derivative_eps > 0,
+ "\tdouble find_min_using_approximate_derivatives()"
+ << "\n\tYou have to supply column vectors to this function"
+ << "\n\tx.nc(): " << x.nc()
+ << "\n\tderivative_eps: " << derivative_eps
+ );
+
+ T g, s;
+
+ double f_value = f(x);
+ g = derivative(f,derivative_eps)(x);
+
+ if (!is_finite(f_value))
+ throw error("The objective function generated non-finite outputs");
+ if (!is_finite(g))
+ throw error("The objective function generated non-finite outputs");
+
+ while(stop_strategy.should_continue_search(x, f_value, g) && f_value > min_f)
+ {
+ s = search_strategy.get_next_direction(x, f_value, g);
+
+ double alpha = line_search(
+ make_line_search_function(f,x,s,f_value),
+ f_value,
+ derivative(make_line_search_function(f,x,s),derivative_eps),
+ dot(g,s), // Sometimes the following line is a better way of determining the initial gradient.
+ //derivative(make_line_search_function(f,x,s),derivative_eps)(0),
+ search_strategy.get_wolfe_rho(), search_strategy.get_wolfe_sigma(), min_f,
+ search_strategy.get_max_line_search_iterations()
+ );
+
+ // Take the search step indicated by the above line search
+ x += alpha*s;
+
+ g = derivative(f,derivative_eps)(x);
+
+ if (!is_finite(f_value))
+ throw error("The objective function generated non-finite outputs");
+ if (!is_finite(g))
+ throw error("The objective function generated non-finite outputs");
+ }
+
+ return f_value;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename search_strategy_type,
+ typename stop_strategy_type,
+ typename funct,
+ typename T
+ >
+ double find_max_using_approximate_derivatives (
+ search_strategy_type search_strategy,
+ stop_strategy_type stop_strategy,
+ const funct& f,
+ T& x,
+ double max_f,
+ double derivative_eps = 1e-7
+ )
+ {
+ COMPILE_TIME_ASSERT(is_matrix<T>::value);
+ // The starting point (i.e. x) must be a column vector.
+ COMPILE_TIME_ASSERT(T::NC <= 1);
+
+ DLIB_CASSERT (
+ is_col_vector(x) && derivative_eps > 0,
+ "\tdouble find_max_using_approximate_derivatives()"
+ << "\n\tYou have to supply column vectors to this function"
+ << "\n\tx.nc(): " << x.nc()
+ << "\n\tderivative_eps: " << derivative_eps
+ );
+
+ // Just negate the necessary things and call the find_min version of this function.
+ return -find_min_using_approximate_derivatives(
+ search_strategy,
+ stop_strategy,
+ negate_function(f),
+ x,
+ -max_f,
+ derivative_eps
+ );
+ }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// Functions for box constrained optimization
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ template <typename T, typename U, typename V>
+ T zero_bounded_variables (
+ const double eps,
+ T vect,
+ const T& x,
+ const T& gradient,
+ const U& x_lower,
+ const V& x_upper
+ )
+ {
+ for (long i = 0; i < gradient.size(); ++i)
+ {
+ const double tol = eps*std::abs(x(i));
+ // if x(i) is an active bound constraint
+ if (x_lower(i)+tol >= x(i) && gradient(i) > 0)
+ vect(i) = 0;
+ else if (x_upper(i)-tol <= x(i) && gradient(i) < 0)
+ vect(i) = 0;
+ }
+ return vect;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <typename T, typename U, typename V>
+ T gap_step_assign_bounded_variables (
+ const double eps,
+ T vect,
+ const T& x,
+ const T& gradient,
+ const U& x_lower,
+ const V& x_upper
+ )
+ {
+ for (long i = 0; i < gradient.size(); ++i)
+ {
+ const double tol = eps*std::abs(x(i));
+ // If x(i) is an active bound constraint then we should set its search
+ // direction such that a single step along the direction either does nothing or
+ // closes the gap of size tol before hitting the bound exactly.
+ if (x_lower(i)+tol >= x(i) && gradient(i) > 0)
+ vect(i) = x_lower(i)-x(i);
+ else if (x_upper(i)-tol <= x(i) && gradient(i) < 0)
+ vect(i) = x_upper(i)-x(i);
+ }
+ return vect;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename search_strategy_type,
+ typename stop_strategy_type,
+ typename funct,
+ typename funct_der,
+ typename T,
+ typename EXP1,
+ typename EXP2
+ >
+ double find_min_box_constrained (
+ search_strategy_type search_strategy,
+ stop_strategy_type stop_strategy,
+ const funct& f,
+ const funct_der& der,
+ T& x,
+ const matrix_exp<EXP1>& x_lower,
+ const matrix_exp<EXP2>& x_upper
+ )
+ {
+ /*
+ The implementation of this function is more or less based on the discussion in
+ the paper Projected Newton-type Methods in Machine Learning by Mark Schmidt, et al.
+ */
+
+ // make sure the requires clause is not violated
+ COMPILE_TIME_ASSERT(is_matrix<T>::value);
+ // The starting point (i.e. x) must be a column vector.
+ COMPILE_TIME_ASSERT(T::NC <= 1);
+
+ DLIB_CASSERT (
+ is_col_vector(x) && is_col_vector(x_lower) && is_col_vector(x_upper) &&
+ x.size() == x_lower.size() && x.size() == x_upper.size(),
+ "\tdouble find_min_box_constrained()"
+ << "\n\t The inputs to this function must be equal length column vectors."
+ << "\n\t is_col_vector(x): " << is_col_vector(x)
+ << "\n\t is_col_vector(x_upper): " << is_col_vector(x_upper)
+ << "\n\t is_col_vector(x_upper): " << is_col_vector(x_upper)
+ << "\n\t x.size(): " << x.size()
+ << "\n\t x_lower.size(): " << x_lower.size()
+ << "\n\t x_upper.size(): " << x_upper.size()
+ );
+ DLIB_ASSERT (
+ min(x_upper-x_lower) >= 0,
+ "\tdouble find_min_box_constrained()"
+ << "\n\t You have to supply proper box constraints to this function."
+ << "\n\r min(x_upper-x_lower): " << min(x_upper-x_lower)
+ );
+
+
+ T g, s;
+ double f_value = f(x);
+ g = der(x);
+
+ if (!is_finite(f_value))
+ throw error("The objective function generated non-finite outputs");
+ if (!is_finite(g))
+ throw error("The objective function generated non-finite outputs");
+
+ // gap_eps determines how close we have to get to a bound constraint before we
+ // start basically dropping it from the optimization and consider it to be an
+ // active constraint.
+ const double gap_eps = 1e-8;
+
+ double last_alpha = 1;
+ while(stop_strategy.should_continue_search(x, f_value, g))
+ {
+ s = search_strategy.get_next_direction(x, f_value, zero_bounded_variables(gap_eps, g, x, g, x_lower, x_upper));
+ s = gap_step_assign_bounded_variables(gap_eps, s, x, g, x_lower, x_upper);
+
+ double alpha = backtracking_line_search(
+ make_line_search_function(clamp_function(f,x_lower,x_upper), x, s, f_value),
+ f_value,
+ dot(g,s), // compute gradient for the line search
+ last_alpha,
+ search_strategy.get_wolfe_rho(),
+ search_strategy.get_max_line_search_iterations());
+
+ // Do a trust region style thing for alpha. The idea is that if we take a
+ // small step then we are likely to take another small step. So we reuse the
+ // alpha from the last iteration unless the line search didn't shrink alpha at
+ // all, in that case, we start with a bigger alpha next time.
+ if (alpha == last_alpha)
+ last_alpha = std::min(last_alpha*10,1.0);
+ else
+ last_alpha = alpha;
+
+ // Take the search step indicated by the above line search
+ x = dlib::clamp(x + alpha*s, x_lower, x_upper);
+ g = der(x);
+
+ if (!is_finite(f_value))
+ throw error("The objective function generated non-finite outputs");
+ if (!is_finite(g))
+ throw error("The objective function generated non-finite outputs");
+ }
+
+ return f_value;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename search_strategy_type,
+ typename stop_strategy_type,
+ typename funct,
+ typename funct_der,
+ typename T
+ >
+ double find_min_box_constrained (
+ search_strategy_type search_strategy,
+ stop_strategy_type stop_strategy,
+ const funct& f,
+ const funct_der& der,
+ T& x,
+ double x_lower,
+ double x_upper
+ )
+ {
+ // The starting point (i.e. x) must be a column vector.
+ COMPILE_TIME_ASSERT(T::NC <= 1);
+
+ typedef typename T::type scalar_type;
+ return find_min_box_constrained(search_strategy,
+ stop_strategy,
+ f,
+ der,
+ x,
+ uniform_matrix<scalar_type>(x.size(),1,x_lower),
+ uniform_matrix<scalar_type>(x.size(),1,x_upper) );
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename search_strategy_type,
+ typename stop_strategy_type,
+ typename funct,
+ typename funct_der,
+ typename T,
+ typename EXP1,
+ typename EXP2
+ >
+ double find_max_box_constrained (
+ search_strategy_type search_strategy,
+ stop_strategy_type stop_strategy,
+ const funct& f,
+ const funct_der& der,
+ T& x,
+ const matrix_exp<EXP1>& x_lower,
+ const matrix_exp<EXP2>& x_upper
+ )
+ {
+ // make sure the requires clause is not violated
+ COMPILE_TIME_ASSERT(is_matrix<T>::value);
+ // The starting point (i.e. x) must be a column vector.
+ COMPILE_TIME_ASSERT(T::NC <= 1);
+
+ DLIB_CASSERT (
+ is_col_vector(x) && is_col_vector(x_lower) && is_col_vector(x_upper) &&
+ x.size() == x_lower.size() && x.size() == x_upper.size(),
+ "\tdouble find_max_box_constrained()"
+ << "\n\t The inputs to this function must be equal length column vectors."
+ << "\n\t is_col_vector(x): " << is_col_vector(x)
+ << "\n\t is_col_vector(x_upper): " << is_col_vector(x_upper)
+ << "\n\t is_col_vector(x_upper): " << is_col_vector(x_upper)
+ << "\n\t x.size(): " << x.size()
+ << "\n\t x_lower.size(): " << x_lower.size()
+ << "\n\t x_upper.size(): " << x_upper.size()
+ );
+ DLIB_ASSERT (
+ min(x_upper-x_lower) >= 0,
+ "\tdouble find_max_box_constrained()"
+ << "\n\t You have to supply proper box constraints to this function."
+ << "\n\r min(x_upper-x_lower): " << min(x_upper-x_lower)
+ );
+
+ // This function is basically just a copy of find_min_box_constrained() but with - put
+ // in the right places to flip things around so that it ends up looking for the max
+ // rather than the min.
+
+ T g, s;
+ double f_value = -f(x);
+ g = -der(x);
+
+ if (!is_finite(f_value))
+ throw error("The objective function generated non-finite outputs");
+ if (!is_finite(g))
+ throw error("The objective function generated non-finite outputs");
+
+ // gap_eps determines how close we have to get to a bound constraint before we
+ // start basically dropping it from the optimization and consider it to be an
+ // active constraint.
+ const double gap_eps = 1e-8;
+
+ double last_alpha = 1;
+ while(stop_strategy.should_continue_search(x, f_value, g))
+ {
+ s = search_strategy.get_next_direction(x, f_value, zero_bounded_variables(gap_eps, g, x, g, x_lower, x_upper));
+ s = gap_step_assign_bounded_variables(gap_eps, s, x, g, x_lower, x_upper);
+
+ double alpha = backtracking_line_search(
+ negate_function(make_line_search_function(clamp_function(f,x_lower,x_upper), x, s, f_value)),
+ f_value,
+ dot(g,s), // compute gradient for the line search
+ last_alpha,
+ search_strategy.get_wolfe_rho(),
+ search_strategy.get_max_line_search_iterations());
+
+ // Do a trust region style thing for alpha. The idea is that if we take a
+ // small step then we are likely to take another small step. So we reuse the
+ // alpha from the last iteration unless the line search didn't shrink alpha at
+ // all, in that case, we start with a bigger alpha next time.
+ if (alpha == last_alpha)
+ last_alpha = std::min(last_alpha*10,1.0);
+ else
+ last_alpha = alpha;
+
+ // Take the search step indicated by the above line search
+ x = dlib::clamp(x + alpha*s, x_lower, x_upper);
+ g = -der(x);
+
+ // Don't forget to negate the output from the line search since it is from the
+ // unnegated version of f()
+ f_value *= -1;
+
+ if (!is_finite(f_value))
+ throw error("The objective function generated non-finite outputs");
+ if (!is_finite(g))
+ throw error("The objective function generated non-finite outputs");
+ }
+
+ return -f_value;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename search_strategy_type,
+ typename stop_strategy_type,
+ typename funct,
+ typename funct_der,
+ typename T
+ >
+ double find_max_box_constrained (
+ search_strategy_type search_strategy,
+ stop_strategy_type stop_strategy,
+ const funct& f,
+ const funct_der& der,
+ T& x,
+ double x_lower,
+ double x_upper
+ )
+ {
+ // The starting point (i.e. x) must be a column vector.
+ COMPILE_TIME_ASSERT(T::NC <= 1);
+
+ typedef typename T::type scalar_type;
+ return find_max_box_constrained(search_strategy,
+ stop_strategy,
+ f,
+ der,
+ x,
+ uniform_matrix<scalar_type>(x.size(),1,x_lower),
+ uniform_matrix<scalar_type>(x.size(),1,x_upper) );
+ }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_OPTIMIZATIOn_H_
+