summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/optimization/optimization_oca_abstract.h
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/dlib/optimization/optimization_oca_abstract.h')
-rw-r--r--ml/dlib/dlib/optimization/optimization_oca_abstract.h334
1 files changed, 0 insertions, 334 deletions
diff --git a/ml/dlib/dlib/optimization/optimization_oca_abstract.h b/ml/dlib/dlib/optimization/optimization_oca_abstract.h
deleted file mode 100644
index 859dbdcfc..000000000
--- a/ml/dlib/dlib/optimization/optimization_oca_abstract.h
+++ /dev/null
@@ -1,334 +0,0 @@
-// Copyright (C) 2010 Davis E. King (davis@dlib.net)
-// License: Boost Software License See LICENSE.txt for the full license.
-#undef DLIB_OPTIMIZATION_OCA_ABsTRACT_Hh_
-#ifdef DLIB_OPTIMIZATION_OCA_ABsTRACT_Hh_
-
-// ----------------------------------------------------------------------------------------
-
-namespace dlib
-{
- template <typename matrix_type>
- class oca_problem
- {
- /*!
- REQUIREMENTS ON matrix_type
- - matrix_type == a dlib::matrix capable of storing column vectors
-
- WHAT THIS OBJECT REPRESENTS
- This object is the interface used to define the optimization
- problems solved by the oca optimizer defined later in this file.
-
- OCA solves optimization problems with the following form:
- Minimize: f(w) == 0.5*length_squared(w) + C*R(w)
-
- Where R(w) is a user-supplied convex function and C > 0. Optionally,
- there can also be non-negativity constraints on some or all of the
- elements of w.
-
- Or it can alternatively solve:
- Minimize: f(w) == 0.5*length_squared(w-prior) + C*R(w)
-
- Where prior is a user supplied vector and R(w) has the same
- interpretation as above.
-
- Or it can use the elastic net regularizer:
- Minimize: f(w) == 0.5*(1-lasso_lambda)*length_squared(w) + lasso_lambda*sum(abs(w)) + C*R(w)
-
- Where lasso_lambda is a number in the range [0, 1) and controls
- trade-off between doing L1 and L2 regularization. R(w) has the same
- interpretation as above.
-
-
- Note that the stopping condition must be provided by the user
- in the form of the optimization_status() function.
- !*/
-
- public:
-
- typedef typename matrix_type::type scalar_type;
-
- virtual ~oca_problem() {}
-
- virtual bool risk_has_lower_bound (
- scalar_type& lower_bound
- ) const { return false; }
- /*!
- ensures
- - if (R(w) >= a constant for all values of w) then
- - returns true
- - #lower_bound == the constant that lower bounds R(w)
- - else
- - returns false
- !*/
-
- virtual bool optimization_status (
- scalar_type current_objective_value,
- scalar_type current_error_gap,
- scalar_type current_risk_value,
- scalar_type current_risk_gap,
- unsigned long num_cutting_planes,
- unsigned long num_iterations
- ) const = 0;
- /*!
- requires
- - This function is called by the OCA optimizer each iteration.
- - current_objective_value == the current value of the objective function f(w)
- - current_error_gap == The bound on how much lower the objective function
- can drop before we reach the optimal point. At the optimal solution the
- error gap is equal to 0.
- - current_risk_value == the current value of the R(w) term of the objective function.
- - current_risk_gap == the bound on how much lower the risk term can go. At the optimal
- solution the risk gap is zero.
- - num_cutting_planes == the number of cutting planes the algorithm is currently using.
- - num_iterations == A count of the total number of iterations that have executed
- since we started running the optimization.
- ensures
- - If it is appropriate to terminate the optimization then this function returns true
- and false otherwise.
- !*/
-
- virtual scalar_type get_c (
- ) const = 0;
- /*!
- ensures
- - returns the C parameter
- !*/
-
- virtual long get_num_dimensions (
- ) const = 0;
- /*!
- ensures
- - returns the number of free variables in this optimization problem
- !*/
-
- virtual void get_risk (
- matrix_type& current_solution,
- scalar_type& risk_value,
- matrix_type& risk_subgradient
- ) const = 0;
- /*!
- requires
- - is_col_vector(current_solution) == true
- - current_solution.size() == get_num_dimensions()
- ensures
- - #current_solution will be set to one of the following:
- - current_solution (i.e. it won't be modified at all)
- - The result of a line search passing through current_solution.
- - #risk_value == R(#current_solution)
- - #risk_subgradient == an element of the subgradient of R() at the
- point #current_solution
- - Note that #risk_value and #risk_subgradient are NOT multiplied by get_c()
- !*/
-
- };
-
-// ----------------------------------------------------------------------------------------
-
- class oca
- {
- /*!
- INITIAL VALUE
- - get_subproblem_epsilon() == 1e-2
- - get_subproblem_max_iterations() == 50000
- - get_inactive_plane_threshold() == 20
-
- WHAT THIS OBJECT REPRESENTS
- This object is a tool for solving the optimization problem defined above
- by the oca_problem abstract class.
-
- For reference, OCA solves optimization problems with the following form:
- Minimize: f(w) == 0.5*length_squared(w) + C*R(w)
-
- Where R(w) is a user-supplied convex function and C > 0. Optionally,
- this object can also add non-negativity constraints to some or all
- of the elements of w.
-
- Or it can alternatively solve:
- Minimize: f(w) == 0.5*length_squared(w-prior) + C*R(w)
-
- Where prior is a user supplied vector and R(w) has the same
- interpretation as above.
-
- Or it can use the elastic net regularizer:
- Minimize: f(w) == 0.5*(1-lasso_lambda)*length_squared(w) + lasso_lambda*sum(abs(w)) + C*R(w)
-
- Where lasso_lambda is a number in the range [0, 1) and controls
- trade-off between doing L1 and L2 regularization. R(w) has the same
- interpretation as above.
-
-
- For a detailed discussion you should consult the following papers
- from the Journal of Machine Learning Research:
- Optimized Cutting Plane Algorithm for Large-Scale Risk Minimization
- Vojtech Franc, Soren Sonnenburg; 10(Oct):2157--2192, 2009.
-
- Bundle Methods for Regularized Risk Minimization
- Choon Hui Teo, S.V.N. Vishwanthan, Alex J. Smola, Quoc V. Le; 11(Jan):311-365, 2010.
- !*/
- public:
-
- oca (
- );
- /*!
- ensures
- - this object is properly initialized
- !*/
-
- template <
- typename matrix_type
- >
- typename matrix_type::type operator() (
- const oca_problem<matrix_type>& problem,
- matrix_type& w,
- unsigned long num_nonnegative = 0,
- unsigned long force_weight_to_1 = std::numeric_limits<unsigned long>::max()
- ) const;
- /*!
- requires
- - problem.get_c() > 0
- - problem.get_num_dimensions() > 0
- ensures
- - solves the given oca problem and stores the solution in #w. In particular,
- this function solves:
- Minimize: f(w) == 0.5*length_squared(w) + C*R(w)
- - The optimization algorithm runs until problem.optimization_status()
- indicates it is time to stop.
- - returns the objective value at the solution #w
- - if (num_nonnegative != 0) then
- - Adds the constraint that #w(i) >= 0 for all i < num_nonnegative.
- That is, the first num_nonnegative elements of #w will always be
- non-negative. This includes the copies of w passed to get_risk()
- in the form of the current_solution vector as well as the final
- output of this function.
- - if (force_weight_to_1 < problem.get_num_dimensions()) then
- - The optimizer enforces the following constraints:
- - #w(force_weight_to_1) == 1
- - for all i > force_weight_to_1:
- - #w(i) == 0
- - That is, the element in the weight vector at the index indicated
- by force_weight_to_1 will have a value of 1 upon completion of
- this function, while all subsequent elements of w will have
- values of 0.
- !*/
-
- template <
- typename matrix_type
- >
- typename matrix_type::type operator() (
- const oca_problem<matrix_type>& problem,
- matrix_type& w,
- const matrix_type& prior
- ) const;
- /*!
- requires
- - problem.get_c() > 0
- - problem.get_num_dimensions() > 0
- - is_col_vector(prior) == true
- - prior.size() == problem.get_num_dimensions()
- ensures
- - solves the given oca problem and stores the solution in #w.
- - In this mode, we solve a version of the problem with a different
- regularizer. In particular, this function solves:
- Minimize: f(w) == 0.5*length_squared(w-prior) + C*R(w)
- - The optimization algorithm runs until problem.optimization_status()
- indicates it is time to stop.
- - returns the objective value at the solution #w
- !*/
-
- template <
- typename matrix_type
- >
- typename matrix_type::type solve_with_elastic_net (
- const oca_problem<matrix_type>& problem,
- matrix_type& w,
- scalar_type lasso_lambda,
- unsigned long force_weight_to_1 = std::numeric_limits<unsigned long>::max()
- ) const;
- /*!
- requires
- - problem.get_c() > 0
- - problem.get_num_dimensions() > 0
- - 0 <= lasso_lambda < 1
- ensures
- - Solves the given oca problem and stores the solution in #w, but uses an
- elastic net regularizer instead of the normal L2 regularizer. In
- particular, this function solves:
- Minimize: f(w) == 0.5*(1-lasso_lambda)*length_squared(w) + lasso_lambda*sum(abs(w)) + C*R(w)
- - The optimization algorithm runs until problem.optimization_status()
- indicates it is time to stop.
- - returns the objective value at the solution #w
- - if (force_weight_to_1 < problem.get_num_dimensions()) then
- - The optimizer enforces the following constraints:
- - #w(force_weight_to_1) == 1
- - for all i > force_weight_to_1:
- - #w(i) == 0
- - That is, the element in the weight vector at the index indicated
- by force_weight_to_1 will have a value of 1 upon completion of
- this function, while all subsequent elements of w will have
- values of 0.
- !*/
-
- void set_subproblem_epsilon (
- double eps
- );
- /*!
- requires
- - eps > 0
- ensures
- - #get_subproblem_epsilon() == eps
- !*/
-
- double get_subproblem_epsilon (
- ) const;
- /*!
- ensures
- - returns the accuracy used in solving the quadratic programming
- subproblem that is part of the overall OCA algorithm.
- !*/
-
- void set_subproblem_max_iterations (
- unsigned long sub_max_iter
- );
- /*!
- requires
- - sub_max_iter > 0
- ensures
- - #get_subproblem_max_iterations() == sub_max_iter
- !*/
-
- unsigned long get_subproblem_max_iterations (
- ) const;
- /*!
- ensures
- - returns the maximum number of iterations this object will perform
- while attempting to solve each quadratic programming subproblem.
- !*/
-
- void set_inactive_plane_threshold (
- unsigned long inactive_thresh
- );
- /*!
- requires
- - inactive_thresh > 0
- ensures
- - #get_inactive_plane_threshold() == inactive_thresh
- !*/
-
- unsigned long get_inactive_plane_threshold (
- ) const;
- /*!
- ensures
- - As OCA runs it builds up a set of cutting planes. Typically
- cutting planes become inactive after a certain point and can then
- be removed. This function returns the number of iterations of
- inactivity required before a cutting plane is removed.
- !*/
-
- };
-}
-
-// ----------------------------------------------------------------------------------------
-
-#endif // DLIB_OPTIMIZATION_OCA_ABsTRACT_Hh_
-
-