summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/global_optimization/global_function_search_abstract.h
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/dlib/global_optimization/global_function_search_abstract.h')
-rw-r--r--ml/dlib/dlib/global_optimization/global_function_search_abstract.h605
1 files changed, 605 insertions, 0 deletions
diff --git a/ml/dlib/dlib/global_optimization/global_function_search_abstract.h b/ml/dlib/dlib/global_optimization/global_function_search_abstract.h
new file mode 100644
index 000000000..c8bfc3993
--- /dev/null
+++ b/ml/dlib/dlib/global_optimization/global_function_search_abstract.h
@@ -0,0 +1,605 @@
+// Copyright (C) 2017 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_GLOBAL_FuNCTION_SEARCH_ABSTRACT_Hh_
+#ifdef DLIB_GLOBAL_FuNCTION_SEARCH_ABSTRACT_Hh_
+
+#include <vector>
+#include "../matrix.h"
+#include "upper_bound_function_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ struct function_spec
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This object is a simple struct that lets you define the valid inputs to a
+ multivariate function. It lets you define bound constraints for each
+ variable as well as say if a variable is integer valued or not. Therefore,
+ an instance of this struct says that a function takes upper.size() input
+ variables, where the ith variable must be in the range [lower(i) upper(i)]
+ and be an integer if is_integer_variable[i]==true.
+ !*/
+
+ function_spec(
+ matrix<double,0,1> bound1,
+ matrix<double,0,1> bound2
+ );
+ /*!
+ requires
+ - bound1.size() == bound2.size()
+ - for all valid i: bound1(i) != bound2(i)
+ ensures
+ - #is_integer_variable.size() == bound1.size()
+ - #lower.size() == bound1.size()
+ - #upper.size() == bound1.size()
+ - for all valid i:
+ - #is_integer_variable[i] == false
+ - #lower(i) == min(bound1(i), bound2(i))
+ - #upper(i) == max(bound1(i), bound2(i))
+ !*/
+
+ function_spec(
+ matrix<double,0,1> lower,
+ matrix<double,0,1> upper,
+ std::vector<bool> is_integer
+ );
+ /*!
+ requires
+ - bound1.size() == bound2.size() == is_integer.size()
+ - for all valid i: bound1(i) != bound2(i)
+ ensures
+ - #is_integer_variable.size() == bound1.size()
+ - #lower.size() == bound1.size()
+ - #upper.size() == bound1.size()
+ - for all valid i:
+ - #is_integer_variable[i] == is_integer[i]
+ - #lower(i) == min(bound1(i), bound2(i))
+ - #upper(i) == max(bound1(i), bound2(i))
+ !*/
+
+ matrix<double,0,1> lower;
+ matrix<double,0,1> upper;
+ std::vector<bool> is_integer_variable;
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ class function_evaluation_request
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This object represents a request, by the global_function_search object, to
+ evaluate a real-valued function and report back the results.
+
+ THREAD SAFETY
+ You shouldn't let more than one thread touch a function_evaluation_request
+ at the same time. However, it is safe to send instances of this class to
+ other threads for processing. This lets you evaluate multiple
+ function_evaluation_requests in parallel. Any appropriate synchronization
+ with regard to the originating global_function_search instance is handled
+ automatically.
+ !*/
+
+ public:
+
+ // You can't make or copy this object, the only way to get one is from the
+ // global_function_search class via get_next_x().
+ function_evaluation_request() = delete;
+ function_evaluation_request(const function_evaluation_request&) = delete;
+ function_evaluation_request& operator=(const function_evaluation_request&) = delete;
+
+ // You can however move and swap this object.
+ function_evaluation_request(function_evaluation_request&& item);
+ function_evaluation_request& operator=(function_evaluation_request&& item);
+ /*!
+ ensures
+ - *this takes the state of item.
+ - #item.has_been_evaluated() == true
+ !*/
+
+ ~function_evaluation_request(
+ );
+ /*!
+ ensures
+ - frees all resources associated with this object.
+ - It's fine to destruct function_evaluation_requests even if they haven't
+ been evaluated yet. If this happens it will simply be as if the request
+ was never issued.
+ !*/
+
+ size_t function_idx (
+ ) const;
+ /*!
+ ensures
+ - Returns the function index that identifies which function is to be
+ evaluated.
+ !*/
+
+ const matrix<double,0,1>& x (
+ ) const;
+ /*!
+ ensures
+ - returns the input parameters to the function to be evaluated.
+ !*/
+
+ bool has_been_evaluated (
+ ) const;
+ /*!
+ ensures
+ - If this evaluation request is still outstanding then returns false,
+ otherwise returns true. That is, if the global_function_search is still
+ waiting for you report back by calling set() then
+ has_been_evaluated()==false.
+ !*/
+
+ void set (
+ double y
+ );
+ /*!
+ requires
+ - has_been_evaluated() == false
+ ensures
+ - #has_been_evaluated() == true
+ - Notifies the global_function_search instance that created this object
+ that when the function_idx()th function is evaluated with x() as input
+ then the output is y.
+ !*/
+
+ void swap(
+ function_evaluation_request& item
+ );
+ /*!
+ ensures
+ - swaps the state of *this and item
+ !*/
+
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ class global_function_search
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This object performs global optimization of a set of user supplied
+ functions. The goal is to maximize the following objective function:
+ max_{function_i,x_i}: function_i(x_i)
+ subject to bound constraints on each element of x_i. Moreover, each
+ element of x_i can be either real valued or integer valued. Each of the
+ functions can also take a different number of variables. Therefore, the
+ final result of the optimization tells you which function produced the
+ largest output and what input (i.e. the x value) to that function is
+ necessary to obtain that maximal value.
+
+ Importantly, the global_function_search object does not require the user to
+ supply derivatives. Moreover, the functions may contain discontinuities,
+ behave stochastically, and have many local maxima. The global_function_search
+ object will attempt to find the global optima in the face of these challenges.
+ It is also designed to use as few function evaluations as possible, making
+ it suitable for optimizing functions that are very expensive to evaluate.
+
+ It does this by alternating between two modes. A global exploration mode
+ and a local optima refinement mode. This is accomplished by building and
+ maintaining two models of the objective function:
+ 1. A global model that upper bounds our objective function. This is a
+ non-parametric piecewise linear model based on all function
+ evaluations ever seen by the global_function_search object.
+ 2. A local quadratic model fit around the best point seen so far.
+
+ The optimization procedure therefore looks like this:
+
+ while(not done)
+ {
+ DO GLOBAL EXPLORE STEP:
+ Find the point that maximizes the upper bounding model since
+ that is the point with the largest possible improvement in the
+ objective function.
+
+ Evaluate the new point and incorporate it into our models.
+
+ DO LOCAL REFINEMENT STEP:
+ Find the optimal solution to the local quadratic model.
+
+ If this point looks like it will improve on the "best point seen
+ so far" by at least get_solver_epsilon() then we evaluate that
+ point and incorporate it into our models, otherwise we ignore
+ it.
+ }
+
+ You can see that we alternate between global search and local refinement,
+ except in the case where the local model seems to have converged to within
+ get_solver_epsilon() accuracy. In that case only global search steps are
+ used. We do this in the hope that the global search will find a new and
+ better local optima to explore, which would then reactivate local
+ refinement when it has something productive to do.
+
+
+ Now let's turn our attention to the specific API defined by the
+ global_function_search object. We will begin by showing a short example of
+ its use:
+
+ // Suppose we want to find which of these functions, F() and G(), have
+ // the largest output and what input is necessary to produce the
+ // maximal output.
+ auto F = [](double a, double b) { return -std::pow(a-2,2.0) - std::pow(b-4,2.0); };
+ auto G = [](double x) { return 2-std::pow(x-5,2.0); };
+
+ // We first define function_spec objects that specify bounds on the
+ // inputs to each function. The search process will only search within
+ // these bounds.
+ function_spec spec_F({-10,-10}, {10,10});
+ function_spec spec_G({-2}, {6});
+ // Then we create a global_function_search object with those function specifications.
+ global_function_search opt({spec_F, spec_G});
+
+ // Here we run 15 iterations of the search process. Note that the user
+ // of global_function_search writes the main solver loop, which is
+ // somewhat unusual. We will discuss why that is in a moment, but for
+ // now let's look at this example.
+ for (int i = 0; i < 15; ++i)
+ {
+ // All we do here is ask the global_function_search object what to
+ // evaluate next, then do what it asked, and then report the
+ // results back by calling function_evaluation_request's set()
+ // method.
+ function_evaluation_request next = opt.get_next_x();
+ // next.function_idx() tells you which of the functions you should
+ // evaluate. We have 2 functions here (F and G) so function_idx()
+ // can take only the values 0 and 1. If, for example, we had 10
+ // functions it would take the values 0 through 9.
+ if (next.function_idx() == 0)
+ {
+ // Call F with the inputs requested by the
+ // global_function_search and report them back.
+ double a = next.x()(0);
+ double b = next.x()(1);
+ next.set(F(a,b)); // Tell the solver what happened.
+ }
+ else
+ {
+ double x = next.x()(0);
+ next.set(G(x));
+ }
+ }
+
+ // Find out what point gave the largest outputs:
+ matrix<double,0,1> x;
+ double y;
+ size_t function_idx;
+ opt.get_best_function_eval(x,y,function_idx);
+
+ cout << "function_idx: "<< function_idx << endl;
+ cout << "y: " << y << endl;
+ cout << "x: " << x << endl;
+
+ The above cout statements will print this:
+
+ function_idx: 1
+ y: 2
+ x: 5
+
+ Which is the correct result since G(5) gives the largest possible output in
+ our example.
+
+ So why does the user write the main loop? Why isn't it embedded inside
+ dlib? Well, there are two answers to this. The first is that it is. Most
+ users should just call dlib::find_max_global() which does exactly that, it
+ runs the loop for you. However, the API shown above gives you the
+ opportunity to run multiple function evaluations in parallel. For
+ instance, it is perfectly valid to call get_next_x() multiple times and
+ send the resulting function_evaluation_request objects to separate threads
+ for processing. Those separate threads can run the functions being
+ optimized (e.g. F and G or whatever) and report back by calling
+ function_evaluation_request::set(). You could even spread the work across
+ a compute cluster if you have one.
+
+ So what happens if you have N outstanding function evaluation requests?
+ Or in other words, what happens if you called get_next_x() N times and
+ haven't yet called their set() methods? Well, 1 of the N requests will be
+ a local refinement step while the N-1 other requests will be global
+ exploration steps generated from the current upper bounding model. This
+ should give you an idea of the usefulness of this kind of parallelism. If
+ for example, your functions being optimized were simple convex functions
+ this kind of parallelism wouldn't help since essentially all the
+ interesting work in the solver is going to be done by the local optimizer.
+ However, if your function has a lot of local optima, running many global
+ exploration steps in parallel might significantly reduce the time it takes
+ to find a good solution.
+
+ It should also be noted that our upper bounding model is implemented by the
+ dlib::upper_bound_function object, which is a tool that allows us to create
+ a tight upper bound on our objective function. This upper bound is
+ non-parametric and gets progressively more accurate as the optimization
+ progresses, but also more and more expensive to maintain. It causes the
+ runtime of the entire optimization procedure to be O(N^2) where N is the
+ number of objective function evaluations. So problems that require millions
+ of function evaluations to find a good solution are not appropriate for the
+ global_function_search tool. However, if your objective function is very
+ expensive to evaluate then this relatively expensive upper bounding model
+ is well worth its computational cost.
+
+ Finally, let's introduce some background literature on this algorithm. The
+ two most relevant papers in the optimization literature are:
+ Global optimization of Lipschitz functions Malherbe, Cédric and Vayatis,
+ Nicolas International Conference on Machine Learning - 2017
+ and
+ The NEWUOA software for unconstrained optimization without derivatives By
+ M.J.D. Powell, 40th Workshop on Large Scale Nonlinear Optimization (Erice,
+ Italy, 2004)
+
+ Our upper bounding model is an extension of the AdaLIPO method in the
+ Malherbe. See the documentation of dlib::upper_bound_function for more
+ details on that, as we make a number of important extensions. The other
+ part of our method, our local refinement model, is essentially the same
+ type of trust region model proposed by Powell in the above paper. That is,
+ each time we do a local refinement step we identify the best point seen so
+ far, fit a quadratic function around it using the function evaluations we
+ have collected so far, and then use a simple trust region procedure to
+ decide the next best point to evaluate based on our quadratic model.
+
+ The method proposed by Malherbe gives excellent global search performance
+ but has terrible convergence properties in the area around a maxima.
+ Powell's method on the other hand has excellent convergence in the area
+ around a local maxima, as expected by a quadratic trust region method, but
+ is aggressively local maxima seeking. It will simply get stuck in the
+ nearest local optima. Combining the two together as we do here gives us
+ excellent performance in both global search and final convergence speed
+ near a local optima. Causing the global_function_search to perform well
+ for functions with many local optima while still giving high precision
+ solutions. For instance, on typical tests problems, like the Holder table
+ function, the global_function_search object can reliably find the globally
+ optimal solution to full floating point precision in under a few hundred
+ steps.
+
+
+ THREAD SAFETY
+ You shouldn't let more than one thread touch a global_function_search
+ instance at the same time.
+ !*/
+
+ public:
+
+ global_function_search(
+ );
+ /*!
+ ensures
+ - #num_functions() == 0
+ - #get_relative_noise_magnitude() == 0.001
+ - #get_solver_epsilon() == 0
+ - #get_monte_carlo_upper_bound_sample_num() == 5000
+ - #get_pure_random_search_probability() == 0.02
+ !*/
+
+ explicit global_function_search(
+ const function_spec& function
+ );
+ /*!
+ ensures
+ - #num_functions() == 1
+ - #get_function_evaluations() will indicate that there are no function evaluations yet.
+ - #get_relative_noise_magnitude() == 0.001
+ - #get_solver_epsilon() == 0
+ - #get_monte_carlo_upper_bound_sample_num() == 5000
+ - #get_pure_random_search_probability() == 0.02
+ !*/
+
+ explicit global_function_search(
+ const std::vector<function_spec>& functions
+ );
+ /*!
+ ensures
+ - #num_functions() == functions.size()
+ - #get_function_evaluations() will indicate that there are no function evaluations yet.
+ - #get_relative_noise_magnitude() == 0.001
+ - #get_solver_epsilon() == 0
+ - #get_monte_carlo_upper_bound_sample_num() == 5000
+ - #get_pure_random_search_probability() == 0.02
+ !*/
+
+ global_function_search(
+ const std::vector<function_spec>& functions,
+ const std::vector<std::vector<function_evaluation>>& initial_function_evals,
+ const double relative_noise_magnitude = 0.001
+ );
+ /*!
+ requires
+ - functions.size() == initial_function_evals.size()
+ - relative_noise_magnitude >= 0
+ ensures
+ - #num_functions() == functions.size()
+ - #get_function_evaluations() will return the provided initial_function_evals.
+ - #get_relative_noise_magnitude() == relative_noise_magnitude
+ - #get_solver_epsilon() == 0
+ - #get_monte_carlo_upper_bound_sample_num() == 5000
+ - #get_pure_random_search_probability() == 0.02
+ !*/
+
+ // This object can't be copied.
+ global_function_search(const global_function_search&) = delete;
+ global_function_search& operator=(const global_function_search& item) = delete;
+ // But it can be moved
+ global_function_search(global_function_search&& item) = default;
+ global_function_search& operator=(global_function_search&& item) = default;
+ /*!
+ ensures
+ - moves the state of item into *this
+ - #item.num_functions() == 0
+ !*/
+
+ void set_seed (
+ time_t seed
+ );
+ /*!
+ ensures
+ - Part of this object's algorithm uses random sampling to decide what
+ points to evaluate next. Calling set_seed() lets you set the seed used
+ by the random number generator. Note that if you don't call set_seed()
+ you will always get the same deterministic behavior.
+ !*/
+
+ size_t num_functions(
+ ) const;
+ /*!
+ ensures
+ - returns the number of functions being optimized.
+ !*/
+
+ void get_function_evaluations (
+ std::vector<function_spec>& specs,
+ std::vector<std::vector<function_evaluation>>& function_evals
+ ) const;
+ /*!
+ ensures
+ - #specs.size() == num_functions()
+ - #function_evals.size() == num_functions()
+ - This function allows you to query the state of the solver. In
+ particular, you can find the function_specs for each function being
+ optimized and their recorded evaluations.
+ - for all valid i:
+ - function_evals[i] == all the function evaluations that have been
+ recorded for the ith function (i.e. the function with the
+ function_spec #specs[i]). That is, this is the record of all the x
+ and y values reported back by function_evaluation_request::set()
+ calls.
+ !*/
+
+ void get_best_function_eval (
+ matrix<double,0,1>& x,
+ double& y,
+ size_t& function_idx
+ ) const;
+ /*!
+ requires
+ - num_functions() != 0
+ ensures
+ - if (no function evaluations have been recorded yet) then
+ - The outputs of this function are in a valid but undefined state.
+ - else
+ - This function tells you which function has produced the largest
+ output seen so far. It also tells you the inputs to that function
+ that leads to those outputs (x) as well as the output value itself (y).
+ - 0 <= #function_idx < num_functions()
+ - #function_idx == the index of the function that produced the largest output seen so far.
+ - #x == the input parameters to the function that produced the largest outputs seen so far.
+ - #y == the largest output seen so far.
+ !*/
+
+ function_evaluation_request get_next_x (
+ );
+ /*!
+ requires
+ - num_functions() != 0
+ ensures
+ - Generates and returns a function evaluation request. See the discussion
+ in the WHAT THIS OBJECT REPRESENTS section above for details.
+ !*/
+
+ double get_pure_random_search_probability (
+ ) const;
+ /*!
+ ensures
+ - When we decide to do a global explore step we will, with probability
+ get_pure_random_search_probability(), sample a point completely at random
+ rather than using the upper bounding model. Therefore, if you set this
+ probability to 0 then we will depend entirely on the upper bounding
+ model. Alternatively, if you set get_pure_random_search_probability() to
+ 1 then we won't use the upper bounding model at all and instead use pure
+ random search to do global exploration. Pure random search is much
+ faster than using the upper bounding model, so if you know that your
+ objective function is especially simple it can be faster to use pure
+ random search. However, if you really know your function that well you
+ should probably use a gradient based optimizer :)
+ !*/
+
+ void set_pure_random_search_probability (
+ double prob
+ );
+ /*!
+ requires
+ - prob >= 0
+ ensures
+ - #get_pure_random_search_probability() == prob
+ !*/
+
+ double get_solver_epsilon (
+ ) const;
+ /*!
+ ensures
+ - As discussed in the WHAT THIS OBJECT REPRESENTS section, we only do a
+ local refinement step if we haven't already found the peak of the current
+ local optima. get_solver_epsilon() sets the tolerance for deciding if
+ the local search method has found the local optima. Therefore, when the
+ local trust region model runs we check if its predicted improvement in
+ the objective function is greater than get_solver_epsilon(). If it isn't
+ then we assume it has converged and we should focus entirely on global
+ search.
+
+ This means that, for instance, setting get_solver_epsilon() to 0
+ essentially instructs the solver to find each local optima to full
+ floating point precision and only then to focus on pure global search.
+ !*/
+
+ void set_solver_epsilon (
+ double eps
+ );
+ /*!
+ requires
+ - eps >= 0
+ ensures
+ - #get_solver_epsilon() == eps
+ !*/
+
+ double get_relative_noise_magnitude (
+ ) const;
+ /*!
+ ensures
+ - Returns the value of the relative noise magnitude parameter to the
+ dlib::upper_bound_function's used by this object. See the
+ upper_bound_function's documentation for a detailed discussion of this
+ parameter's meaning. Most users should leave this value as its default
+ setting.
+ !*/
+
+ void set_relative_noise_magnitude (
+ double value
+ );
+ /*!
+ requires
+ - value >= 0
+ ensures
+ - #get_relative_noise_magnitude() == value
+ !*/
+
+ size_t get_monte_carlo_upper_bound_sample_num (
+ ) const;
+ /*!
+ ensures
+ - To find the point that maximizes the upper bounding model we use
+ get_monte_carlo_upper_bound_sample_num() random evaluations and select
+ the largest upper bound from that set. So this parameter influences how
+ well we estimate the maximum point on the upper bounding model.
+ !*/
+
+ void set_monte_carlo_upper_bound_sample_num (
+ size_t num
+ );
+ /*!
+ requires
+ - num > 0
+ ensures
+ - #get_monte_carlo_upper_bound_sample_num() == num
+ !*/
+
+ };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_GLOBAL_FuNCTION_SEARCH_ABSTRACT_Hh_
+
+