diff options
Diffstat (limited to 'ml/dlib/dlib/optimization/optimization_oca_abstract.h')
-rw-r--r-- | ml/dlib/dlib/optimization/optimization_oca_abstract.h | 334 |
1 files changed, 0 insertions, 334 deletions
diff --git a/ml/dlib/dlib/optimization/optimization_oca_abstract.h b/ml/dlib/dlib/optimization/optimization_oca_abstract.h deleted file mode 100644 index 859dbdcfc..000000000 --- a/ml/dlib/dlib/optimization/optimization_oca_abstract.h +++ /dev/null @@ -1,334 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_OPTIMIZATION_OCA_ABsTRACT_Hh_ -#ifdef DLIB_OPTIMIZATION_OCA_ABsTRACT_Hh_ - -// ---------------------------------------------------------------------------------------- - -namespace dlib -{ - template <typename matrix_type> - class oca_problem - { - /*! - REQUIREMENTS ON matrix_type - - matrix_type == a dlib::matrix capable of storing column vectors - - WHAT THIS OBJECT REPRESENTS - This object is the interface used to define the optimization - problems solved by the oca optimizer defined later in this file. - - OCA solves optimization problems with the following form: - Minimize: f(w) == 0.5*length_squared(w) + C*R(w) - - Where R(w) is a user-supplied convex function and C > 0. Optionally, - there can also be non-negativity constraints on some or all of the - elements of w. - - Or it can alternatively solve: - Minimize: f(w) == 0.5*length_squared(w-prior) + C*R(w) - - Where prior is a user supplied vector and R(w) has the same - interpretation as above. - - Or it can use the elastic net regularizer: - Minimize: f(w) == 0.5*(1-lasso_lambda)*length_squared(w) + lasso_lambda*sum(abs(w)) + C*R(w) - - Where lasso_lambda is a number in the range [0, 1) and controls - trade-off between doing L1 and L2 regularization. R(w) has the same - interpretation as above. - - - Note that the stopping condition must be provided by the user - in the form of the optimization_status() function. - !*/ - - public: - - typedef typename matrix_type::type scalar_type; - - virtual ~oca_problem() {} - - virtual bool risk_has_lower_bound ( - scalar_type& lower_bound - ) const { return false; } - /*! - ensures - - if (R(w) >= a constant for all values of w) then - - returns true - - #lower_bound == the constant that lower bounds R(w) - - else - - returns false - !*/ - - virtual bool optimization_status ( - scalar_type current_objective_value, - scalar_type current_error_gap, - scalar_type current_risk_value, - scalar_type current_risk_gap, - unsigned long num_cutting_planes, - unsigned long num_iterations - ) const = 0; - /*! - requires - - This function is called by the OCA optimizer each iteration. - - current_objective_value == the current value of the objective function f(w) - - current_error_gap == The bound on how much lower the objective function - can drop before we reach the optimal point. At the optimal solution the - error gap is equal to 0. - - current_risk_value == the current value of the R(w) term of the objective function. - - current_risk_gap == the bound on how much lower the risk term can go. At the optimal - solution the risk gap is zero. - - num_cutting_planes == the number of cutting planes the algorithm is currently using. - - num_iterations == A count of the total number of iterations that have executed - since we started running the optimization. - ensures - - If it is appropriate to terminate the optimization then this function returns true - and false otherwise. - !*/ - - virtual scalar_type get_c ( - ) const = 0; - /*! - ensures - - returns the C parameter - !*/ - - virtual long get_num_dimensions ( - ) const = 0; - /*! - ensures - - returns the number of free variables in this optimization problem - !*/ - - virtual void get_risk ( - matrix_type& current_solution, - scalar_type& risk_value, - matrix_type& risk_subgradient - ) const = 0; - /*! - requires - - is_col_vector(current_solution) == true - - current_solution.size() == get_num_dimensions() - ensures - - #current_solution will be set to one of the following: - - current_solution (i.e. it won't be modified at all) - - The result of a line search passing through current_solution. - - #risk_value == R(#current_solution) - - #risk_subgradient == an element of the subgradient of R() at the - point #current_solution - - Note that #risk_value and #risk_subgradient are NOT multiplied by get_c() - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - class oca - { - /*! - INITIAL VALUE - - get_subproblem_epsilon() == 1e-2 - - get_subproblem_max_iterations() == 50000 - - get_inactive_plane_threshold() == 20 - - WHAT THIS OBJECT REPRESENTS - This object is a tool for solving the optimization problem defined above - by the oca_problem abstract class. - - For reference, OCA solves optimization problems with the following form: - Minimize: f(w) == 0.5*length_squared(w) + C*R(w) - - Where R(w) is a user-supplied convex function and C > 0. Optionally, - this object can also add non-negativity constraints to some or all - of the elements of w. - - Or it can alternatively solve: - Minimize: f(w) == 0.5*length_squared(w-prior) + C*R(w) - - Where prior is a user supplied vector and R(w) has the same - interpretation as above. - - Or it can use the elastic net regularizer: - Minimize: f(w) == 0.5*(1-lasso_lambda)*length_squared(w) + lasso_lambda*sum(abs(w)) + C*R(w) - - Where lasso_lambda is a number in the range [0, 1) and controls - trade-off between doing L1 and L2 regularization. R(w) has the same - interpretation as above. - - - For a detailed discussion you should consult the following papers - from the Journal of Machine Learning Research: - Optimized Cutting Plane Algorithm for Large-Scale Risk Minimization - Vojtech Franc, Soren Sonnenburg; 10(Oct):2157--2192, 2009. - - Bundle Methods for Regularized Risk Minimization - Choon Hui Teo, S.V.N. Vishwanthan, Alex J. Smola, Quoc V. Le; 11(Jan):311-365, 2010. - !*/ - public: - - oca ( - ); - /*! - ensures - - this object is properly initialized - !*/ - - template < - typename matrix_type - > - typename matrix_type::type operator() ( - const oca_problem<matrix_type>& problem, - matrix_type& w, - unsigned long num_nonnegative = 0, - unsigned long force_weight_to_1 = std::numeric_limits<unsigned long>::max() - ) const; - /*! - requires - - problem.get_c() > 0 - - problem.get_num_dimensions() > 0 - ensures - - solves the given oca problem and stores the solution in #w. In particular, - this function solves: - Minimize: f(w) == 0.5*length_squared(w) + C*R(w) - - The optimization algorithm runs until problem.optimization_status() - indicates it is time to stop. - - returns the objective value at the solution #w - - if (num_nonnegative != 0) then - - Adds the constraint that #w(i) >= 0 for all i < num_nonnegative. - That is, the first num_nonnegative elements of #w will always be - non-negative. This includes the copies of w passed to get_risk() - in the form of the current_solution vector as well as the final - output of this function. - - if (force_weight_to_1 < problem.get_num_dimensions()) then - - The optimizer enforces the following constraints: - - #w(force_weight_to_1) == 1 - - for all i > force_weight_to_1: - - #w(i) == 0 - - That is, the element in the weight vector at the index indicated - by force_weight_to_1 will have a value of 1 upon completion of - this function, while all subsequent elements of w will have - values of 0. - !*/ - - template < - typename matrix_type - > - typename matrix_type::type operator() ( - const oca_problem<matrix_type>& problem, - matrix_type& w, - const matrix_type& prior - ) const; - /*! - requires - - problem.get_c() > 0 - - problem.get_num_dimensions() > 0 - - is_col_vector(prior) == true - - prior.size() == problem.get_num_dimensions() - ensures - - solves the given oca problem and stores the solution in #w. - - In this mode, we solve a version of the problem with a different - regularizer. In particular, this function solves: - Minimize: f(w) == 0.5*length_squared(w-prior) + C*R(w) - - The optimization algorithm runs until problem.optimization_status() - indicates it is time to stop. - - returns the objective value at the solution #w - !*/ - - template < - typename matrix_type - > - typename matrix_type::type solve_with_elastic_net ( - const oca_problem<matrix_type>& problem, - matrix_type& w, - scalar_type lasso_lambda, - unsigned long force_weight_to_1 = std::numeric_limits<unsigned long>::max() - ) const; - /*! - requires - - problem.get_c() > 0 - - problem.get_num_dimensions() > 0 - - 0 <= lasso_lambda < 1 - ensures - - Solves the given oca problem and stores the solution in #w, but uses an - elastic net regularizer instead of the normal L2 regularizer. In - particular, this function solves: - Minimize: f(w) == 0.5*(1-lasso_lambda)*length_squared(w) + lasso_lambda*sum(abs(w)) + C*R(w) - - The optimization algorithm runs until problem.optimization_status() - indicates it is time to stop. - - returns the objective value at the solution #w - - if (force_weight_to_1 < problem.get_num_dimensions()) then - - The optimizer enforces the following constraints: - - #w(force_weight_to_1) == 1 - - for all i > force_weight_to_1: - - #w(i) == 0 - - That is, the element in the weight vector at the index indicated - by force_weight_to_1 will have a value of 1 upon completion of - this function, while all subsequent elements of w will have - values of 0. - !*/ - - void set_subproblem_epsilon ( - double eps - ); - /*! - requires - - eps > 0 - ensures - - #get_subproblem_epsilon() == eps - !*/ - - double get_subproblem_epsilon ( - ) const; - /*! - ensures - - returns the accuracy used in solving the quadratic programming - subproblem that is part of the overall OCA algorithm. - !*/ - - void set_subproblem_max_iterations ( - unsigned long sub_max_iter - ); - /*! - requires - - sub_max_iter > 0 - ensures - - #get_subproblem_max_iterations() == sub_max_iter - !*/ - - unsigned long get_subproblem_max_iterations ( - ) const; - /*! - ensures - - returns the maximum number of iterations this object will perform - while attempting to solve each quadratic programming subproblem. - !*/ - - void set_inactive_plane_threshold ( - unsigned long inactive_thresh - ); - /*! - requires - - inactive_thresh > 0 - ensures - - #get_inactive_plane_threshold() == inactive_thresh - !*/ - - unsigned long get_inactive_plane_threshold ( - ) const; - /*! - ensures - - As OCA runs it builds up a set of cutting planes. Typically - cutting planes become inactive after a certain point and can then - be removed. This function returns the number of iterations of - inactivity required before a cutting plane is removed. - !*/ - - }; -} - -// ---------------------------------------------------------------------------------------- - -#endif // DLIB_OPTIMIZATION_OCA_ABsTRACT_Hh_ - - |