diff options
Diffstat (limited to 'ml/dlib/dlib/global_optimization/global_function_search_abstract.h')
-rw-r--r-- | ml/dlib/dlib/global_optimization/global_function_search_abstract.h | 605 |
1 files changed, 605 insertions, 0 deletions
diff --git a/ml/dlib/dlib/global_optimization/global_function_search_abstract.h b/ml/dlib/dlib/global_optimization/global_function_search_abstract.h new file mode 100644 index 000000000..c8bfc3993 --- /dev/null +++ b/ml/dlib/dlib/global_optimization/global_function_search_abstract.h @@ -0,0 +1,605 @@ +// Copyright (C) 2017 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_GLOBAL_FuNCTION_SEARCH_ABSTRACT_Hh_ +#ifdef DLIB_GLOBAL_FuNCTION_SEARCH_ABSTRACT_Hh_ + +#include <vector> +#include "../matrix.h" +#include "upper_bound_function_abstract.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + struct function_spec + { + /*! + WHAT THIS OBJECT REPRESENTS + This object is a simple struct that lets you define the valid inputs to a + multivariate function. It lets you define bound constraints for each + variable as well as say if a variable is integer valued or not. Therefore, + an instance of this struct says that a function takes upper.size() input + variables, where the ith variable must be in the range [lower(i) upper(i)] + and be an integer if is_integer_variable[i]==true. + !*/ + + function_spec( + matrix<double,0,1> bound1, + matrix<double,0,1> bound2 + ); + /*! + requires + - bound1.size() == bound2.size() + - for all valid i: bound1(i) != bound2(i) + ensures + - #is_integer_variable.size() == bound1.size() + - #lower.size() == bound1.size() + - #upper.size() == bound1.size() + - for all valid i: + - #is_integer_variable[i] == false + - #lower(i) == min(bound1(i), bound2(i)) + - #upper(i) == max(bound1(i), bound2(i)) + !*/ + + function_spec( + matrix<double,0,1> lower, + matrix<double,0,1> upper, + std::vector<bool> is_integer + ); + /*! + requires + - bound1.size() == bound2.size() == is_integer.size() + - for all valid i: bound1(i) != bound2(i) + ensures + - #is_integer_variable.size() == bound1.size() + - #lower.size() == bound1.size() + - #upper.size() == bound1.size() + - for all valid i: + - #is_integer_variable[i] == is_integer[i] + - #lower(i) == min(bound1(i), bound2(i)) + - #upper(i) == max(bound1(i), bound2(i)) + !*/ + + matrix<double,0,1> lower; + matrix<double,0,1> upper; + std::vector<bool> is_integer_variable; + }; + +// ---------------------------------------------------------------------------------------- + + class function_evaluation_request + { + /*! + WHAT THIS OBJECT REPRESENTS + This object represents a request, by the global_function_search object, to + evaluate a real-valued function and report back the results. + + THREAD SAFETY + You shouldn't let more than one thread touch a function_evaluation_request + at the same time. However, it is safe to send instances of this class to + other threads for processing. This lets you evaluate multiple + function_evaluation_requests in parallel. Any appropriate synchronization + with regard to the originating global_function_search instance is handled + automatically. + !*/ + + public: + + // You can't make or copy this object, the only way to get one is from the + // global_function_search class via get_next_x(). + function_evaluation_request() = delete; + function_evaluation_request(const function_evaluation_request&) = delete; + function_evaluation_request& operator=(const function_evaluation_request&) = delete; + + // You can however move and swap this object. + function_evaluation_request(function_evaluation_request&& item); + function_evaluation_request& operator=(function_evaluation_request&& item); + /*! + ensures + - *this takes the state of item. + - #item.has_been_evaluated() == true + !*/ + + ~function_evaluation_request( + ); + /*! + ensures + - frees all resources associated with this object. + - It's fine to destruct function_evaluation_requests even if they haven't + been evaluated yet. If this happens it will simply be as if the request + was never issued. + !*/ + + size_t function_idx ( + ) const; + /*! + ensures + - Returns the function index that identifies which function is to be + evaluated. + !*/ + + const matrix<double,0,1>& x ( + ) const; + /*! + ensures + - returns the input parameters to the function to be evaluated. + !*/ + + bool has_been_evaluated ( + ) const; + /*! + ensures + - If this evaluation request is still outstanding then returns false, + otherwise returns true. That is, if the global_function_search is still + waiting for you report back by calling set() then + has_been_evaluated()==false. + !*/ + + void set ( + double y + ); + /*! + requires + - has_been_evaluated() == false + ensures + - #has_been_evaluated() == true + - Notifies the global_function_search instance that created this object + that when the function_idx()th function is evaluated with x() as input + then the output is y. + !*/ + + void swap( + function_evaluation_request& item + ); + /*! + ensures + - swaps the state of *this and item + !*/ + + }; + +// ---------------------------------------------------------------------------------------- + + class global_function_search + { + /*! + WHAT THIS OBJECT REPRESENTS + This object performs global optimization of a set of user supplied + functions. The goal is to maximize the following objective function: + max_{function_i,x_i}: function_i(x_i) + subject to bound constraints on each element of x_i. Moreover, each + element of x_i can be either real valued or integer valued. Each of the + functions can also take a different number of variables. Therefore, the + final result of the optimization tells you which function produced the + largest output and what input (i.e. the x value) to that function is + necessary to obtain that maximal value. + + Importantly, the global_function_search object does not require the user to + supply derivatives. Moreover, the functions may contain discontinuities, + behave stochastically, and have many local maxima. The global_function_search + object will attempt to find the global optima in the face of these challenges. + It is also designed to use as few function evaluations as possible, making + it suitable for optimizing functions that are very expensive to evaluate. + + It does this by alternating between two modes. A global exploration mode + and a local optima refinement mode. This is accomplished by building and + maintaining two models of the objective function: + 1. A global model that upper bounds our objective function. This is a + non-parametric piecewise linear model based on all function + evaluations ever seen by the global_function_search object. + 2. A local quadratic model fit around the best point seen so far. + + The optimization procedure therefore looks like this: + + while(not done) + { + DO GLOBAL EXPLORE STEP: + Find the point that maximizes the upper bounding model since + that is the point with the largest possible improvement in the + objective function. + + Evaluate the new point and incorporate it into our models. + + DO LOCAL REFINEMENT STEP: + Find the optimal solution to the local quadratic model. + + If this point looks like it will improve on the "best point seen + so far" by at least get_solver_epsilon() then we evaluate that + point and incorporate it into our models, otherwise we ignore + it. + } + + You can see that we alternate between global search and local refinement, + except in the case where the local model seems to have converged to within + get_solver_epsilon() accuracy. In that case only global search steps are + used. We do this in the hope that the global search will find a new and + better local optima to explore, which would then reactivate local + refinement when it has something productive to do. + + + Now let's turn our attention to the specific API defined by the + global_function_search object. We will begin by showing a short example of + its use: + + // Suppose we want to find which of these functions, F() and G(), have + // the largest output and what input is necessary to produce the + // maximal output. + auto F = [](double a, double b) { return -std::pow(a-2,2.0) - std::pow(b-4,2.0); }; + auto G = [](double x) { return 2-std::pow(x-5,2.0); }; + + // We first define function_spec objects that specify bounds on the + // inputs to each function. The search process will only search within + // these bounds. + function_spec spec_F({-10,-10}, {10,10}); + function_spec spec_G({-2}, {6}); + // Then we create a global_function_search object with those function specifications. + global_function_search opt({spec_F, spec_G}); + + // Here we run 15 iterations of the search process. Note that the user + // of global_function_search writes the main solver loop, which is + // somewhat unusual. We will discuss why that is in a moment, but for + // now let's look at this example. + for (int i = 0; i < 15; ++i) + { + // All we do here is ask the global_function_search object what to + // evaluate next, then do what it asked, and then report the + // results back by calling function_evaluation_request's set() + // method. + function_evaluation_request next = opt.get_next_x(); + // next.function_idx() tells you which of the functions you should + // evaluate. We have 2 functions here (F and G) so function_idx() + // can take only the values 0 and 1. If, for example, we had 10 + // functions it would take the values 0 through 9. + if (next.function_idx() == 0) + { + // Call F with the inputs requested by the + // global_function_search and report them back. + double a = next.x()(0); + double b = next.x()(1); + next.set(F(a,b)); // Tell the solver what happened. + } + else + { + double x = next.x()(0); + next.set(G(x)); + } + } + + // Find out what point gave the largest outputs: + matrix<double,0,1> x; + double y; + size_t function_idx; + opt.get_best_function_eval(x,y,function_idx); + + cout << "function_idx: "<< function_idx << endl; + cout << "y: " << y << endl; + cout << "x: " << x << endl; + + The above cout statements will print this: + + function_idx: 1 + y: 2 + x: 5 + + Which is the correct result since G(5) gives the largest possible output in + our example. + + So why does the user write the main loop? Why isn't it embedded inside + dlib? Well, there are two answers to this. The first is that it is. Most + users should just call dlib::find_max_global() which does exactly that, it + runs the loop for you. However, the API shown above gives you the + opportunity to run multiple function evaluations in parallel. For + instance, it is perfectly valid to call get_next_x() multiple times and + send the resulting function_evaluation_request objects to separate threads + for processing. Those separate threads can run the functions being + optimized (e.g. F and G or whatever) and report back by calling + function_evaluation_request::set(). You could even spread the work across + a compute cluster if you have one. + + So what happens if you have N outstanding function evaluation requests? + Or in other words, what happens if you called get_next_x() N times and + haven't yet called their set() methods? Well, 1 of the N requests will be + a local refinement step while the N-1 other requests will be global + exploration steps generated from the current upper bounding model. This + should give you an idea of the usefulness of this kind of parallelism. If + for example, your functions being optimized were simple convex functions + this kind of parallelism wouldn't help since essentially all the + interesting work in the solver is going to be done by the local optimizer. + However, if your function has a lot of local optima, running many global + exploration steps in parallel might significantly reduce the time it takes + to find a good solution. + + It should also be noted that our upper bounding model is implemented by the + dlib::upper_bound_function object, which is a tool that allows us to create + a tight upper bound on our objective function. This upper bound is + non-parametric and gets progressively more accurate as the optimization + progresses, but also more and more expensive to maintain. It causes the + runtime of the entire optimization procedure to be O(N^2) where N is the + number of objective function evaluations. So problems that require millions + of function evaluations to find a good solution are not appropriate for the + global_function_search tool. However, if your objective function is very + expensive to evaluate then this relatively expensive upper bounding model + is well worth its computational cost. + + Finally, let's introduce some background literature on this algorithm. The + two most relevant papers in the optimization literature are: + Global optimization of Lipschitz functions Malherbe, Cédric and Vayatis, + Nicolas International Conference on Machine Learning - 2017 + and + The NEWUOA software for unconstrained optimization without derivatives By + M.J.D. Powell, 40th Workshop on Large Scale Nonlinear Optimization (Erice, + Italy, 2004) + + Our upper bounding model is an extension of the AdaLIPO method in the + Malherbe. See the documentation of dlib::upper_bound_function for more + details on that, as we make a number of important extensions. The other + part of our method, our local refinement model, is essentially the same + type of trust region model proposed by Powell in the above paper. That is, + each time we do a local refinement step we identify the best point seen so + far, fit a quadratic function around it using the function evaluations we + have collected so far, and then use a simple trust region procedure to + decide the next best point to evaluate based on our quadratic model. + + The method proposed by Malherbe gives excellent global search performance + but has terrible convergence properties in the area around a maxima. + Powell's method on the other hand has excellent convergence in the area + around a local maxima, as expected by a quadratic trust region method, but + is aggressively local maxima seeking. It will simply get stuck in the + nearest local optima. Combining the two together as we do here gives us + excellent performance in both global search and final convergence speed + near a local optima. Causing the global_function_search to perform well + for functions with many local optima while still giving high precision + solutions. For instance, on typical tests problems, like the Holder table + function, the global_function_search object can reliably find the globally + optimal solution to full floating point precision in under a few hundred + steps. + + + THREAD SAFETY + You shouldn't let more than one thread touch a global_function_search + instance at the same time. + !*/ + + public: + + global_function_search( + ); + /*! + ensures + - #num_functions() == 0 + - #get_relative_noise_magnitude() == 0.001 + - #get_solver_epsilon() == 0 + - #get_monte_carlo_upper_bound_sample_num() == 5000 + - #get_pure_random_search_probability() == 0.02 + !*/ + + explicit global_function_search( + const function_spec& function + ); + /*! + ensures + - #num_functions() == 1 + - #get_function_evaluations() will indicate that there are no function evaluations yet. + - #get_relative_noise_magnitude() == 0.001 + - #get_solver_epsilon() == 0 + - #get_monte_carlo_upper_bound_sample_num() == 5000 + - #get_pure_random_search_probability() == 0.02 + !*/ + + explicit global_function_search( + const std::vector<function_spec>& functions + ); + /*! + ensures + - #num_functions() == functions.size() + - #get_function_evaluations() will indicate that there are no function evaluations yet. + - #get_relative_noise_magnitude() == 0.001 + - #get_solver_epsilon() == 0 + - #get_monte_carlo_upper_bound_sample_num() == 5000 + - #get_pure_random_search_probability() == 0.02 + !*/ + + global_function_search( + const std::vector<function_spec>& functions, + const std::vector<std::vector<function_evaluation>>& initial_function_evals, + const double relative_noise_magnitude = 0.001 + ); + /*! + requires + - functions.size() == initial_function_evals.size() + - relative_noise_magnitude >= 0 + ensures + - #num_functions() == functions.size() + - #get_function_evaluations() will return the provided initial_function_evals. + - #get_relative_noise_magnitude() == relative_noise_magnitude + - #get_solver_epsilon() == 0 + - #get_monte_carlo_upper_bound_sample_num() == 5000 + - #get_pure_random_search_probability() == 0.02 + !*/ + + // This object can't be copied. + global_function_search(const global_function_search&) = delete; + global_function_search& operator=(const global_function_search& item) = delete; + // But it can be moved + global_function_search(global_function_search&& item) = default; + global_function_search& operator=(global_function_search&& item) = default; + /*! + ensures + - moves the state of item into *this + - #item.num_functions() == 0 + !*/ + + void set_seed ( + time_t seed + ); + /*! + ensures + - Part of this object's algorithm uses random sampling to decide what + points to evaluate next. Calling set_seed() lets you set the seed used + by the random number generator. Note that if you don't call set_seed() + you will always get the same deterministic behavior. + !*/ + + size_t num_functions( + ) const; + /*! + ensures + - returns the number of functions being optimized. + !*/ + + void get_function_evaluations ( + std::vector<function_spec>& specs, + std::vector<std::vector<function_evaluation>>& function_evals + ) const; + /*! + ensures + - #specs.size() == num_functions() + - #function_evals.size() == num_functions() + - This function allows you to query the state of the solver. In + particular, you can find the function_specs for each function being + optimized and their recorded evaluations. + - for all valid i: + - function_evals[i] == all the function evaluations that have been + recorded for the ith function (i.e. the function with the + function_spec #specs[i]). That is, this is the record of all the x + and y values reported back by function_evaluation_request::set() + calls. + !*/ + + void get_best_function_eval ( + matrix<double,0,1>& x, + double& y, + size_t& function_idx + ) const; + /*! + requires + - num_functions() != 0 + ensures + - if (no function evaluations have been recorded yet) then + - The outputs of this function are in a valid but undefined state. + - else + - This function tells you which function has produced the largest + output seen so far. It also tells you the inputs to that function + that leads to those outputs (x) as well as the output value itself (y). + - 0 <= #function_idx < num_functions() + - #function_idx == the index of the function that produced the largest output seen so far. + - #x == the input parameters to the function that produced the largest outputs seen so far. + - #y == the largest output seen so far. + !*/ + + function_evaluation_request get_next_x ( + ); + /*! + requires + - num_functions() != 0 + ensures + - Generates and returns a function evaluation request. See the discussion + in the WHAT THIS OBJECT REPRESENTS section above for details. + !*/ + + double get_pure_random_search_probability ( + ) const; + /*! + ensures + - When we decide to do a global explore step we will, with probability + get_pure_random_search_probability(), sample a point completely at random + rather than using the upper bounding model. Therefore, if you set this + probability to 0 then we will depend entirely on the upper bounding + model. Alternatively, if you set get_pure_random_search_probability() to + 1 then we won't use the upper bounding model at all and instead use pure + random search to do global exploration. Pure random search is much + faster than using the upper bounding model, so if you know that your + objective function is especially simple it can be faster to use pure + random search. However, if you really know your function that well you + should probably use a gradient based optimizer :) + !*/ + + void set_pure_random_search_probability ( + double prob + ); + /*! + requires + - prob >= 0 + ensures + - #get_pure_random_search_probability() == prob + !*/ + + double get_solver_epsilon ( + ) const; + /*! + ensures + - As discussed in the WHAT THIS OBJECT REPRESENTS section, we only do a + local refinement step if we haven't already found the peak of the current + local optima. get_solver_epsilon() sets the tolerance for deciding if + the local search method has found the local optima. Therefore, when the + local trust region model runs we check if its predicted improvement in + the objective function is greater than get_solver_epsilon(). If it isn't + then we assume it has converged and we should focus entirely on global + search. + + This means that, for instance, setting get_solver_epsilon() to 0 + essentially instructs the solver to find each local optima to full + floating point precision and only then to focus on pure global search. + !*/ + + void set_solver_epsilon ( + double eps + ); + /*! + requires + - eps >= 0 + ensures + - #get_solver_epsilon() == eps + !*/ + + double get_relative_noise_magnitude ( + ) const; + /*! + ensures + - Returns the value of the relative noise magnitude parameter to the + dlib::upper_bound_function's used by this object. See the + upper_bound_function's documentation for a detailed discussion of this + parameter's meaning. Most users should leave this value as its default + setting. + !*/ + + void set_relative_noise_magnitude ( + double value + ); + /*! + requires + - value >= 0 + ensures + - #get_relative_noise_magnitude() == value + !*/ + + size_t get_monte_carlo_upper_bound_sample_num ( + ) const; + /*! + ensures + - To find the point that maximizes the upper bounding model we use + get_monte_carlo_upper_bound_sample_num() random evaluations and select + the largest upper bound from that set. So this parameter influences how + well we estimate the maximum point on the upper bounding model. + !*/ + + void set_monte_carlo_upper_bound_sample_num ( + size_t num + ); + /*! + requires + - num > 0 + ensures + - #get_monte_carlo_upper_bound_sample_num() == num + !*/ + + }; + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_GLOBAL_FuNCTION_SEARCH_ABSTRACT_Hh_ + + |