diff options
Diffstat (limited to 'ml/dlib/dlib/optimization/optimization.h')
-rw-r--r-- | ml/dlib/dlib/optimization/optimization.h | 714 |
1 files changed, 0 insertions, 714 deletions
diff --git a/ml/dlib/dlib/optimization/optimization.h b/ml/dlib/dlib/optimization/optimization.h deleted file mode 100644 index 561d64376..000000000 --- a/ml/dlib/dlib/optimization/optimization.h +++ /dev/null @@ -1,714 +0,0 @@ -// Copyright (C) 2008 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_OPTIMIZATIOn_H_ -#define DLIB_OPTIMIZATIOn_H_ - -#include <cmath> -#include <limits> -#include "optimization_abstract.h" -#include "optimization_search_strategies.h" -#include "optimization_stop_strategies.h" -#include "optimization_line_search.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// Functions that transform other functions -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template <typename funct> - class central_differences - { - public: - central_differences(const funct& f_, double eps_ = 1e-7) : f(f_), eps(eps_){} - - template <typename T> - typename T::matrix_type operator()(const T& x) const - { - // T must be some sort of dlib matrix - COMPILE_TIME_ASSERT(is_matrix<T>::value); - - typename T::matrix_type der(x.size()); - typename T::matrix_type e(x); - for (long i = 0; i < x.size(); ++i) - { - const double old_val = e(i); - - e(i) += eps; - const double delta_plus = f(e); - e(i) = old_val - eps; - const double delta_minus = f(e); - - der(i) = (delta_plus - delta_minus)/((old_val+eps)-(old_val-eps)); - - // and finally restore the old value of this element - e(i) = old_val; - } - - return der; - } - - template <typename T, typename U> - typename U::matrix_type operator()(const T& item, const U& x) const - { - // U must be some sort of dlib matrix - COMPILE_TIME_ASSERT(is_matrix<U>::value); - - typename U::matrix_type der(x.size()); - typename U::matrix_type e(x); - for (long i = 0; i < x.size(); ++i) - { - const double old_val = e(i); - - e(i) += eps; - const double delta_plus = f(item,e); - e(i) = old_val - eps; - const double delta_minus = f(item,e); - - der(i) = (delta_plus - delta_minus)/((old_val+eps)-(old_val-eps)); - - // and finally restore the old value of this element - e(i) = old_val; - } - - return der; - } - - - double operator()(const double& x) const - { - return (f(x+eps)-f(x-eps))/((x+eps)-(x-eps)); - } - - private: - const funct& f; - const double eps; - }; - - template <typename funct> - const central_differences<funct> derivative(const funct& f) { return central_differences<funct>(f); } - template <typename funct> - const central_differences<funct> derivative(const funct& f, double eps) - { - DLIB_ASSERT ( - eps > 0, - "\tcentral_differences derivative(f,eps)" - << "\n\tYou must give an epsilon > 0" - << "\n\teps: " << eps - ); - return central_differences<funct>(f,eps); - } - -// ---------------------------------------------------------------------------------------- - - template <typename funct, typename EXP1, typename EXP2> - struct clamped_function_object - { - clamped_function_object( - const funct& f_, - const matrix_exp<EXP1>& x_lower_, - const matrix_exp<EXP2>& x_upper_ - ) : f(f_), x_lower(x_lower_), x_upper(x_upper_) - { - } - - template <typename T> - double operator() ( - const T& x - ) const - { - return f(clamp(x,x_lower,x_upper)); - } - - const funct& f; - const matrix_exp<EXP1>& x_lower; - const matrix_exp<EXP2>& x_upper; - }; - - template <typename funct, typename EXP1, typename EXP2> - clamped_function_object<funct,EXP1,EXP2> clamp_function( - const funct& f, - const matrix_exp<EXP1>& x_lower, - const matrix_exp<EXP2>& x_upper - ) { return clamped_function_object<funct,EXP1,EXP2>(f,x_lower,x_upper); } - -// ---------------------------------------------------------------------------------------- - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// Functions that perform unconstrained optimization -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename search_strategy_type, - typename stop_strategy_type, - typename funct, - typename funct_der, - typename T - > - double find_min ( - search_strategy_type search_strategy, - stop_strategy_type stop_strategy, - const funct& f, - const funct_der& der, - T& x, - double min_f - ) - { - COMPILE_TIME_ASSERT(is_matrix<T>::value); - // The starting point (i.e. x) must be a column vector. - COMPILE_TIME_ASSERT(T::NC <= 1); - - DLIB_CASSERT ( - is_col_vector(x), - "\tdouble find_min()" - << "\n\tYou have to supply column vectors to this function" - << "\n\tx.nc(): " << x.nc() - ); - - - T g, s; - - double f_value = f(x); - g = der(x); - - if (!is_finite(f_value)) - throw error("The objective function generated non-finite outputs"); - if (!is_finite(g)) - throw error("The objective function generated non-finite outputs"); - - while(stop_strategy.should_continue_search(x, f_value, g) && f_value > min_f) - { - s = search_strategy.get_next_direction(x, f_value, g); - - double alpha = line_search( - make_line_search_function(f,x,s, f_value), - f_value, - make_line_search_function(der,x,s, g), - dot(g,s), // compute initial gradient for the line search - search_strategy.get_wolfe_rho(), search_strategy.get_wolfe_sigma(), min_f, - search_strategy.get_max_line_search_iterations()); - - // Take the search step indicated by the above line search - x += alpha*s; - - if (!is_finite(f_value)) - throw error("The objective function generated non-finite outputs"); - if (!is_finite(g)) - throw error("The objective function generated non-finite outputs"); - } - - return f_value; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename search_strategy_type, - typename stop_strategy_type, - typename funct, - typename funct_der, - typename T - > - double find_max ( - search_strategy_type search_strategy, - stop_strategy_type stop_strategy, - const funct& f, - const funct_der& der, - T& x, - double max_f - ) - { - COMPILE_TIME_ASSERT(is_matrix<T>::value); - // The starting point (i.e. x) must be a column vector. - COMPILE_TIME_ASSERT(T::NC <= 1); - - DLIB_CASSERT ( - is_col_vector(x), - "\tdouble find_max()" - << "\n\tYou have to supply column vectors to this function" - << "\n\tx.nc(): " << x.nc() - ); - - T g, s; - - // This function is basically just a copy of find_min() but with - put in the right places - // to flip things around so that it ends up looking for the max rather than the min. - - double f_value = -f(x); - g = -der(x); - - if (!is_finite(f_value)) - throw error("The objective function generated non-finite outputs"); - if (!is_finite(g)) - throw error("The objective function generated non-finite outputs"); - - while(stop_strategy.should_continue_search(x, f_value, g) && f_value > -max_f) - { - s = search_strategy.get_next_direction(x, f_value, g); - - double alpha = line_search( - negate_function(make_line_search_function(f,x,s, f_value)), - f_value, - negate_function(make_line_search_function(der,x,s, g)), - dot(g,s), // compute initial gradient for the line search - search_strategy.get_wolfe_rho(), search_strategy.get_wolfe_sigma(), -max_f, - search_strategy.get_max_line_search_iterations() - ); - - // Take the search step indicated by the above line search - x += alpha*s; - - // Don't forget to negate these outputs from the line search since they are - // from the unnegated versions of f() and der() - g *= -1; - f_value *= -1; - - if (!is_finite(f_value)) - throw error("The objective function generated non-finite outputs"); - if (!is_finite(g)) - throw error("The objective function generated non-finite outputs"); - } - - return -f_value; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename search_strategy_type, - typename stop_strategy_type, - typename funct, - typename T - > - double find_min_using_approximate_derivatives ( - search_strategy_type search_strategy, - stop_strategy_type stop_strategy, - const funct& f, - T& x, - double min_f, - double derivative_eps = 1e-7 - ) - { - COMPILE_TIME_ASSERT(is_matrix<T>::value); - // The starting point (i.e. x) must be a column vector. - COMPILE_TIME_ASSERT(T::NC <= 1); - - DLIB_CASSERT ( - is_col_vector(x) && derivative_eps > 0, - "\tdouble find_min_using_approximate_derivatives()" - << "\n\tYou have to supply column vectors to this function" - << "\n\tx.nc(): " << x.nc() - << "\n\tderivative_eps: " << derivative_eps - ); - - T g, s; - - double f_value = f(x); - g = derivative(f,derivative_eps)(x); - - if (!is_finite(f_value)) - throw error("The objective function generated non-finite outputs"); - if (!is_finite(g)) - throw error("The objective function generated non-finite outputs"); - - while(stop_strategy.should_continue_search(x, f_value, g) && f_value > min_f) - { - s = search_strategy.get_next_direction(x, f_value, g); - - double alpha = line_search( - make_line_search_function(f,x,s,f_value), - f_value, - derivative(make_line_search_function(f,x,s),derivative_eps), - dot(g,s), // Sometimes the following line is a better way of determining the initial gradient. - //derivative(make_line_search_function(f,x,s),derivative_eps)(0), - search_strategy.get_wolfe_rho(), search_strategy.get_wolfe_sigma(), min_f, - search_strategy.get_max_line_search_iterations() - ); - - // Take the search step indicated by the above line search - x += alpha*s; - - g = derivative(f,derivative_eps)(x); - - if (!is_finite(f_value)) - throw error("The objective function generated non-finite outputs"); - if (!is_finite(g)) - throw error("The objective function generated non-finite outputs"); - } - - return f_value; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename search_strategy_type, - typename stop_strategy_type, - typename funct, - typename T - > - double find_max_using_approximate_derivatives ( - search_strategy_type search_strategy, - stop_strategy_type stop_strategy, - const funct& f, - T& x, - double max_f, - double derivative_eps = 1e-7 - ) - { - COMPILE_TIME_ASSERT(is_matrix<T>::value); - // The starting point (i.e. x) must be a column vector. - COMPILE_TIME_ASSERT(T::NC <= 1); - - DLIB_CASSERT ( - is_col_vector(x) && derivative_eps > 0, - "\tdouble find_max_using_approximate_derivatives()" - << "\n\tYou have to supply column vectors to this function" - << "\n\tx.nc(): " << x.nc() - << "\n\tderivative_eps: " << derivative_eps - ); - - // Just negate the necessary things and call the find_min version of this function. - return -find_min_using_approximate_derivatives( - search_strategy, - stop_strategy, - negate_function(f), - x, - -max_f, - derivative_eps - ); - } - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// Functions for box constrained optimization -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template <typename T, typename U, typename V> - T zero_bounded_variables ( - const double eps, - T vect, - const T& x, - const T& gradient, - const U& x_lower, - const V& x_upper - ) - { - for (long i = 0; i < gradient.size(); ++i) - { - const double tol = eps*std::abs(x(i)); - // if x(i) is an active bound constraint - if (x_lower(i)+tol >= x(i) && gradient(i) > 0) - vect(i) = 0; - else if (x_upper(i)-tol <= x(i) && gradient(i) < 0) - vect(i) = 0; - } - return vect; - } - -// ---------------------------------------------------------------------------------------- - - template <typename T, typename U, typename V> - T gap_step_assign_bounded_variables ( - const double eps, - T vect, - const T& x, - const T& gradient, - const U& x_lower, - const V& x_upper - ) - { - for (long i = 0; i < gradient.size(); ++i) - { - const double tol = eps*std::abs(x(i)); - // If x(i) is an active bound constraint then we should set its search - // direction such that a single step along the direction either does nothing or - // closes the gap of size tol before hitting the bound exactly. - if (x_lower(i)+tol >= x(i) && gradient(i) > 0) - vect(i) = x_lower(i)-x(i); - else if (x_upper(i)-tol <= x(i) && gradient(i) < 0) - vect(i) = x_upper(i)-x(i); - } - return vect; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename search_strategy_type, - typename stop_strategy_type, - typename funct, - typename funct_der, - typename T, - typename EXP1, - typename EXP2 - > - double find_min_box_constrained ( - search_strategy_type search_strategy, - stop_strategy_type stop_strategy, - const funct& f, - const funct_der& der, - T& x, - const matrix_exp<EXP1>& x_lower, - const matrix_exp<EXP2>& x_upper - ) - { - /* - The implementation of this function is more or less based on the discussion in - the paper Projected Newton-type Methods in Machine Learning by Mark Schmidt, et al. - */ - - // make sure the requires clause is not violated - COMPILE_TIME_ASSERT(is_matrix<T>::value); - // The starting point (i.e. x) must be a column vector. - COMPILE_TIME_ASSERT(T::NC <= 1); - - DLIB_CASSERT ( - is_col_vector(x) && is_col_vector(x_lower) && is_col_vector(x_upper) && - x.size() == x_lower.size() && x.size() == x_upper.size(), - "\tdouble find_min_box_constrained()" - << "\n\t The inputs to this function must be equal length column vectors." - << "\n\t is_col_vector(x): " << is_col_vector(x) - << "\n\t is_col_vector(x_upper): " << is_col_vector(x_upper) - << "\n\t is_col_vector(x_upper): " << is_col_vector(x_upper) - << "\n\t x.size(): " << x.size() - << "\n\t x_lower.size(): " << x_lower.size() - << "\n\t x_upper.size(): " << x_upper.size() - ); - DLIB_ASSERT ( - min(x_upper-x_lower) >= 0, - "\tdouble find_min_box_constrained()" - << "\n\t You have to supply proper box constraints to this function." - << "\n\r min(x_upper-x_lower): " << min(x_upper-x_lower) - ); - - - T g, s; - double f_value = f(x); - g = der(x); - - if (!is_finite(f_value)) - throw error("The objective function generated non-finite outputs"); - if (!is_finite(g)) - throw error("The objective function generated non-finite outputs"); - - // gap_eps determines how close we have to get to a bound constraint before we - // start basically dropping it from the optimization and consider it to be an - // active constraint. - const double gap_eps = 1e-8; - - double last_alpha = 1; - while(stop_strategy.should_continue_search(x, f_value, g)) - { - s = search_strategy.get_next_direction(x, f_value, zero_bounded_variables(gap_eps, g, x, g, x_lower, x_upper)); - s = gap_step_assign_bounded_variables(gap_eps, s, x, g, x_lower, x_upper); - - double alpha = backtracking_line_search( - make_line_search_function(clamp_function(f,x_lower,x_upper), x, s, f_value), - f_value, - dot(g,s), // compute gradient for the line search - last_alpha, - search_strategy.get_wolfe_rho(), - search_strategy.get_max_line_search_iterations()); - - // Do a trust region style thing for alpha. The idea is that if we take a - // small step then we are likely to take another small step. So we reuse the - // alpha from the last iteration unless the line search didn't shrink alpha at - // all, in that case, we start with a bigger alpha next time. - if (alpha == last_alpha) - last_alpha = std::min(last_alpha*10,1.0); - else - last_alpha = alpha; - - // Take the search step indicated by the above line search - x = dlib::clamp(x + alpha*s, x_lower, x_upper); - g = der(x); - - if (!is_finite(f_value)) - throw error("The objective function generated non-finite outputs"); - if (!is_finite(g)) - throw error("The objective function generated non-finite outputs"); - } - - return f_value; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename search_strategy_type, - typename stop_strategy_type, - typename funct, - typename funct_der, - typename T - > - double find_min_box_constrained ( - search_strategy_type search_strategy, - stop_strategy_type stop_strategy, - const funct& f, - const funct_der& der, - T& x, - double x_lower, - double x_upper - ) - { - // The starting point (i.e. x) must be a column vector. - COMPILE_TIME_ASSERT(T::NC <= 1); - - typedef typename T::type scalar_type; - return find_min_box_constrained(search_strategy, - stop_strategy, - f, - der, - x, - uniform_matrix<scalar_type>(x.size(),1,x_lower), - uniform_matrix<scalar_type>(x.size(),1,x_upper) ); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename search_strategy_type, - typename stop_strategy_type, - typename funct, - typename funct_der, - typename T, - typename EXP1, - typename EXP2 - > - double find_max_box_constrained ( - search_strategy_type search_strategy, - stop_strategy_type stop_strategy, - const funct& f, - const funct_der& der, - T& x, - const matrix_exp<EXP1>& x_lower, - const matrix_exp<EXP2>& x_upper - ) - { - // make sure the requires clause is not violated - COMPILE_TIME_ASSERT(is_matrix<T>::value); - // The starting point (i.e. x) must be a column vector. - COMPILE_TIME_ASSERT(T::NC <= 1); - - DLIB_CASSERT ( - is_col_vector(x) && is_col_vector(x_lower) && is_col_vector(x_upper) && - x.size() == x_lower.size() && x.size() == x_upper.size(), - "\tdouble find_max_box_constrained()" - << "\n\t The inputs to this function must be equal length column vectors." - << "\n\t is_col_vector(x): " << is_col_vector(x) - << "\n\t is_col_vector(x_upper): " << is_col_vector(x_upper) - << "\n\t is_col_vector(x_upper): " << is_col_vector(x_upper) - << "\n\t x.size(): " << x.size() - << "\n\t x_lower.size(): " << x_lower.size() - << "\n\t x_upper.size(): " << x_upper.size() - ); - DLIB_ASSERT ( - min(x_upper-x_lower) >= 0, - "\tdouble find_max_box_constrained()" - << "\n\t You have to supply proper box constraints to this function." - << "\n\r min(x_upper-x_lower): " << min(x_upper-x_lower) - ); - - // This function is basically just a copy of find_min_box_constrained() but with - put - // in the right places to flip things around so that it ends up looking for the max - // rather than the min. - - T g, s; - double f_value = -f(x); - g = -der(x); - - if (!is_finite(f_value)) - throw error("The objective function generated non-finite outputs"); - if (!is_finite(g)) - throw error("The objective function generated non-finite outputs"); - - // gap_eps determines how close we have to get to a bound constraint before we - // start basically dropping it from the optimization and consider it to be an - // active constraint. - const double gap_eps = 1e-8; - - double last_alpha = 1; - while(stop_strategy.should_continue_search(x, f_value, g)) - { - s = search_strategy.get_next_direction(x, f_value, zero_bounded_variables(gap_eps, g, x, g, x_lower, x_upper)); - s = gap_step_assign_bounded_variables(gap_eps, s, x, g, x_lower, x_upper); - - double alpha = backtracking_line_search( - negate_function(make_line_search_function(clamp_function(f,x_lower,x_upper), x, s, f_value)), - f_value, - dot(g,s), // compute gradient for the line search - last_alpha, - search_strategy.get_wolfe_rho(), - search_strategy.get_max_line_search_iterations()); - - // Do a trust region style thing for alpha. The idea is that if we take a - // small step then we are likely to take another small step. So we reuse the - // alpha from the last iteration unless the line search didn't shrink alpha at - // all, in that case, we start with a bigger alpha next time. - if (alpha == last_alpha) - last_alpha = std::min(last_alpha*10,1.0); - else - last_alpha = alpha; - - // Take the search step indicated by the above line search - x = dlib::clamp(x + alpha*s, x_lower, x_upper); - g = -der(x); - - // Don't forget to negate the output from the line search since it is from the - // unnegated version of f() - f_value *= -1; - - if (!is_finite(f_value)) - throw error("The objective function generated non-finite outputs"); - if (!is_finite(g)) - throw error("The objective function generated non-finite outputs"); - } - - return -f_value; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename search_strategy_type, - typename stop_strategy_type, - typename funct, - typename funct_der, - typename T - > - double find_max_box_constrained ( - search_strategy_type search_strategy, - stop_strategy_type stop_strategy, - const funct& f, - const funct_der& der, - T& x, - double x_lower, - double x_upper - ) - { - // The starting point (i.e. x) must be a column vector. - COMPILE_TIME_ASSERT(T::NC <= 1); - - typedef typename T::type scalar_type; - return find_max_box_constrained(search_strategy, - stop_strategy, - f, - der, - x, - uniform_matrix<scalar_type>(x.size(),1,x_lower), - uniform_matrix<scalar_type>(x.size(),1,x_upper) ); - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_OPTIMIZATIOn_H_ - |