7 files changed, 3297 insertions, 0 deletions
diff --git a/ml/dlib/dlib/global_optimization/find_max_global.h b/ml/dlib/dlib/global_optimization/find_max_global.h
new file mode 100644
index 000000000..5356129f5
--- /dev/null
+++ b/ml/dlib/dlib/global_optimization/find_max_global.h
@@ -0,0 +1,511 @@
+// Copyright (C) 2017  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_FiND_GLOBAL_MAXIMUM_hH_
+#define DLIB_FiND_GLOBAL_MAXIMUM_hH_
+
+#include "find_max_global_abstract.h"
+#include "global_function_search.h"
+#include "../metaprogramming.h"
+#include <utility>
+#include <chrono>
+
+namespace dlib
+{
+    namespace gopt_impl
+    {
+
+    // ----------------------------------------------------------------------------------------
+
+        class disable_decay_to_scalar 
+        {
+            const matrix<double,0,1>& a;
+        public:
+            disable_decay_to_scalar(const matrix<double,0,1>& a) : a(a){}
+            operator const matrix<double,0,1>&() const { return a;}
+        };
+
+
+        template <typename T, size_t... indices> 
+        auto _cwv (
+            T&& f, 
+            const matrix<double,0,1>& a, 
+            compile_time_integer_list<indices...>
+        ) -> decltype(f(a(indices-1)...)) 
+        {
+            DLIB_CASSERT(a.size() == sizeof...(indices), 
+                "You invoked dlib::call_function_and_expand_args(f,a) but the number of arguments expected by f() doesn't match the size of 'a'. "
+                << "Expected " << sizeof...(indices) << " arguments but got " << a.size() << "."
+            );  
+            return f(a(indices-1)...); 
+        }
+
+        // Visual studio, as of November 2017, doesn't support C++11 and can't compile this code.  
+        // So we write the terrible garbage in the #else for visual studio.  When Visual Studio supports C++11 I'll update this #ifdef to use the C++11 code.
+#ifndef _MSC_VER 
+        template <size_t max_unpack>
+        struct call_function_and_expand_args
+        {
+            template <typename T>
+            static auto go(T&& f, const matrix<double,0,1>& a) -> decltype(_cwv(std::forward<T>(f),a,typename make_compile_time_integer_range<max_unpack>::type()))
+            {
+                return _cwv(std::forward<T>(f),a,typename make_compile_time_integer_range<max_unpack>::type());
+            }
+
+            template <typename T>
+            static auto go(T&& f, const matrix<double,0,1>& a) -> decltype(call_function_and_expand_args<max_unpack-1>::template go(std::forward<T>(f),a))
+            {
+                return call_function_and_expand_args<max_unpack-1>::go(std::forward<T>(f),a);
+            }
+        };
+
+        template <>
+        struct call_function_and_expand_args<0>
+        {
+            template <typename T>
+            static auto go(T&& f, const matrix<double,0,1>& a) -> decltype(f(disable_decay_to_scalar(a)))
+            {
+                return f(disable_decay_to_scalar(a));
+            }
+        };
+#else
+        template <size_t max_unpack>
+        struct call_function_and_expand_args
+        {         
+template <typename T> static auto go(T&& f, const matrix<double, 0, 1>& a) -> decltype(f(disable_decay_to_scalar(a)))  {return f(disable_decay_to_scalar(a));   }
+template <typename T> static auto go(T&& f, const matrix<double, 0, 1>& a) -> decltype(f(a(0))) { DLIB_CASSERT(a.size() == 1); return f(a(0)); }
+template <typename T> static auto go(T&& f, const matrix<double, 0, 1>& a) -> decltype(f(a(0),a(1))) { DLIB_CASSERT(a.size() == 2); return f(a(0),a(1)); }
+template <typename T> static auto go(T&& f, const matrix<double, 0, 1>& a) -> decltype(f(a(0), a(1), a(2))) { DLIB_CASSERT(a.size() == 3); return f(a(0), a(1),a(2)); }
+template <typename T> static auto go(T&& f, const matrix<double, 0, 1>& a) -> decltype(f(a(0), a(1), a(2), a(3))) { DLIB_CASSERT(a.size() == 4); return f(a(0), a(1), a(2), a(3)); }
+template <typename T> static auto go(T&& f, const matrix<double, 0, 1>& a) -> decltype(f(a(0), a(1), a(2), a(3), a(4))) { DLIB_CASSERT(a.size() == 5); return f(a(0), a(1), a(2), a(3), a(4)); }
+template <typename T> static auto go(T&& f, const matrix<double, 0, 1>& a) -> decltype(f(a(0), a(1), a(2), a(3), a(4), a(5))) { DLIB_CASSERT(a.size() == 6); return f(a(0), a(1), a(2), a(3), a(4), a(5)); }
+template <typename T> static auto go(T&& f, const matrix<double, 0, 1>& a) -> decltype(f(a(0), a(1), a(2), a(3), a(4), a(5), a(6))) { DLIB_CASSERT(a.size() == 7); return f(a(0), a(1), a(2), a(3), a(4), a(5), a(6)); }
+        };
+#endif
+    }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <typename T> 
+    auto call_function_and_expand_args(
+        T&& f, 
+        const matrix<double,0,1>& a
+    ) -> decltype(gopt_impl::call_function_and_expand_args<40>::go(f,a))
+    {
+        // unpack up to 40 parameters when calling f()
+        return gopt_impl::call_function_and_expand_args<40>::go(std::forward<T>(f),a);
+    }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    struct max_function_calls
+    {
+        max_function_calls() = default;
+        explicit max_function_calls(size_t max_calls) : max_calls(max_calls) {}
+        size_t max_calls = std::numeric_limits<size_t>::max();
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    const auto FOREVER = std::chrono::hours(24*356*290); // 290 years
+
+// ----------------------------------------------------------------------------------------
+
+    namespace impl
+    {
+        template <
+            typename funct
+            >
+        std::pair<size_t,function_evaluation> find_max_global (
+            std::vector<funct>& functions,
+            std::vector<function_spec> specs,
+            const max_function_calls num,
+            const std::chrono::nanoseconds max_runtime,
+            double solver_epsilon,
+            double ymult
+        ) 
+        {
+            // Decide which parameters should be searched on a log scale.  Basically, it's
+            // common for machine learning models to have parameters that should be searched on
+            // a log scale (e.g. SVM C).  These parameters are usually identifiable because
+            // they have bounds like [1e-5 1e10], that is, they span a very large range of
+            // magnitudes from really small to really big.  So there we are going to check for
+            // that and if we find parameters with that kind of bound constraints we will
+            // transform them to a log scale automatically.
+            std::vector<std::vector<bool>> log_scale(specs.size());
+            for (size_t i = 0; i < specs.size(); ++i)
+            {
+                for (long j = 0; j < specs[i].lower.size(); ++j)
+                {
+                    if (!specs[i].is_integer_variable[j] && specs[i].lower(j) > 0 && specs[i].upper(j)/specs[i].lower(j) >= 1000)
+                    {
+                        log_scale[i].push_back(true);
+                        specs[i].lower(j) = std::log(specs[i].lower(j));
+                        specs[i].upper(j) = std::log(specs[i].upper(j));
+                    }
+                    else
+                    {
+                        log_scale[i].push_back(false);
+                    }
+                }
+            }
+
+            global_function_search opt(specs);
+            opt.set_solver_epsilon(solver_epsilon);
+
+            const auto time_to_stop = std::chrono::steady_clock::now() + max_runtime;
+
+            // Now run the main solver loop.
+            for (size_t i = 0; i < num.max_calls && std::chrono::steady_clock::now() < time_to_stop; ++i)
+            {
+                auto next = opt.get_next_x();
+                matrix<double,0,1> x = next.x();
+                // Undo any log-scaling that was applied to the variables before we pass them
+                // to the functions being optimized.
+                for (long j = 0; j < x.size(); ++j)
+                {
+                    if (log_scale[next.function_idx()][j])
+                        x(j) = std::exp(x(j));
+                }
+                double y = ymult*call_function_and_expand_args(functions[next.function_idx()], x);
+                next.set(y);
+            }
+
+
+            matrix<double,0,1> x;
+            double y;
+            size_t function_idx;
+            opt.get_best_function_eval(x,y,function_idx);
+            // Undo any log-scaling that was applied to the variables before we output them. 
+            for (long j = 0; j < x.size(); ++j)
+            {
+                if (log_scale[function_idx][j])
+                    x(j) = std::exp(x(j));
+            }
+            return std::make_pair(function_idx, function_evaluation(x,y/ymult));
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename funct
+        >
+    std::pair<size_t,function_evaluation> find_max_global (
+        std::vector<funct>& functions,
+        std::vector<function_spec> specs,
+        const max_function_calls num,
+        const std::chrono::nanoseconds max_runtime = FOREVER,
+        double solver_epsilon = 0
+    ) 
+    {
+        return impl::find_max_global(functions, std::move(specs), num, max_runtime, solver_epsilon, +1);
+    }
+
+    template <
+        typename funct
+        >
+    std::pair<size_t,function_evaluation> find_min_global (
+        std::vector<funct>& functions,
+        std::vector<function_spec> specs,
+        const max_function_calls num,
+        const std::chrono::nanoseconds max_runtime = FOREVER,
+        double solver_epsilon = 0
+    ) 
+    {
+        return impl::find_max_global(functions, std::move(specs), num, max_runtime, solver_epsilon, -1);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename funct
+        >
+    function_evaluation find_max_global (
+        funct f,
+        const matrix<double,0,1>& bound1,
+        const matrix<double,0,1>& bound2,
+        const std::vector<bool>& is_integer_variable,
+        const max_function_calls num,
+        const std::chrono::nanoseconds max_runtime = FOREVER,
+        double solver_epsilon = 0
+    ) 
+    {
+        std::vector<funct> functions(1,std::move(f));
+        std::vector<function_spec> specs(1, function_spec(bound1, bound2, is_integer_variable));
+        return find_max_global(functions, std::move(specs), num, max_runtime, solver_epsilon).second;
+    }
+
+    template <
+        typename funct
+        >
+    function_evaluation find_min_global (
+        funct f,
+        const matrix<double,0,1>& bound1,
+        const matrix<double,0,1>& bound2,
+        const std::vector<bool>& is_integer_variable,
+        const max_function_calls num,
+        const std::chrono::nanoseconds max_runtime = FOREVER,
+        double solver_epsilon = 0
+    ) 
+    {
+        std::vector<funct> functions(1,std::move(f));
+        std::vector<function_spec> specs(1, function_spec(bound1, bound2, is_integer_variable));
+        return find_min_global(functions, std::move(specs), num, max_runtime, solver_epsilon).second;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename funct
+        >
+    function_evaluation find_max_global (
+        funct f,
+        const matrix<double,0,1>& bound1,
+        const matrix<double,0,1>& bound2,
+        const std::vector<bool>& is_integer_variable,
+        const max_function_calls num,
+        double solver_epsilon 
+    )
+    {
+        return find_max_global(std::move(f), bound1, bound2, is_integer_variable, num, FOREVER, solver_epsilon);
+    }
+
+    template <
+        typename funct
+        >
+    function_evaluation find_min_global (
+        funct f,
+        const matrix<double,0,1>& bound1,
+        const matrix<double,0,1>& bound2,
+        const std::vector<bool>& is_integer_variable,
+        const max_function_calls num,
+        double solver_epsilon 
+    )
+    {
+        return find_min_global(std::move(f), bound1, bound2, is_integer_variable, num, FOREVER, solver_epsilon);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename funct
+        >
+    function_evaluation find_max_global (
+        funct f,
+        const matrix<double,0,1>& bound1,
+        const matrix<double,0,1>& bound2,
+        const max_function_calls num,
+        const std::chrono::nanoseconds max_runtime = FOREVER,
+        double solver_epsilon = 0
+    ) 
+    {
+        return find_max_global(std::move(f), bound1, bound2, std::vector<bool>(bound1.size(),false), num, max_runtime, solver_epsilon);
+    }
+
+    template <
+        typename funct
+        >
+    function_evaluation find_min_global (
+        funct f,
+        const matrix<double,0,1>& bound1,
+        const matrix<double,0,1>& bound2,
+        const max_function_calls num,
+        const std::chrono::nanoseconds max_runtime = FOREVER,
+        double solver_epsilon = 0
+    ) 
+    {
+        return find_min_global(std::move(f), bound1, bound2, std::vector<bool>(bound1.size(),false), num, max_runtime, solver_epsilon);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename funct
+        >
+    function_evaluation find_max_global (
+        funct f,
+        const matrix<double,0,1>& bound1,
+        const matrix<double,0,1>& bound2,
+        const max_function_calls num,
+        double solver_epsilon
+    ) 
+    {
+        return find_max_global(std::move(f), bound1, bound2, std::vector<bool>(bound1.size(),false), num, FOREVER, solver_epsilon);
+    }
+
+    template <
+        typename funct
+        >
+    function_evaluation find_min_global (
+        funct f,
+        const matrix<double,0,1>& bound1,
+        const matrix<double,0,1>& bound2,
+        const max_function_calls num,
+        double solver_epsilon
+    ) 
+    {
+        return find_min_global(std::move(f), bound1, bound2, std::vector<bool>(bound1.size(),false), num, FOREVER, solver_epsilon);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename funct
+        >
+    function_evaluation find_max_global (
+        funct f,
+        const double bound1,
+        const double bound2,
+        const max_function_calls num,
+        const std::chrono::nanoseconds max_runtime = FOREVER,
+        double solver_epsilon = 0
+    ) 
+    {
+        return find_max_global(std::move(f), matrix<double,0,1>({bound1}), matrix<double,0,1>({bound2}), num, max_runtime, solver_epsilon);
+    }
+
+    template <
+        typename funct
+        >
+    function_evaluation find_min_global (
+        funct f,
+        const double bound1,
+        const double bound2,
+        const max_function_calls num,
+        const std::chrono::nanoseconds max_runtime = FOREVER,
+        double solver_epsilon = 0
+    ) 
+    {
+        return find_min_global(std::move(f), matrix<double,0,1>({bound1}), matrix<double,0,1>({bound2}), num, max_runtime, solver_epsilon);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename funct
+        >
+    function_evaluation find_max_global (
+        funct f,
+        const double bound1,
+        const double bound2,
+        const max_function_calls num,
+        double solver_epsilon 
+    ) 
+    {
+        return find_max_global(std::move(f), matrix<double,0,1>({bound1}), matrix<double,0,1>({bound2}), num, FOREVER, solver_epsilon);
+    }
+
+    template <
+        typename funct
+        >
+    function_evaluation find_min_global (
+        funct f,
+        const double bound1,
+        const double bound2,
+        const max_function_calls num,
+        double solver_epsilon 
+    ) 
+    {
+        return find_min_global(std::move(f), matrix<double,0,1>({bound1}), matrix<double,0,1>({bound2}), num, FOREVER, solver_epsilon);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename funct
+        >
+    function_evaluation find_max_global (
+        funct f,
+        const matrix<double,0,1>& bound1,
+        const matrix<double,0,1>& bound2,
+        const std::chrono::nanoseconds max_runtime,
+        double solver_epsilon = 0
+    ) 
+    {
+        return find_max_global(std::move(f), bound1, bound2, max_function_calls(), max_runtime, solver_epsilon);
+    }
+
+    template <
+        typename funct
+        >
+    function_evaluation find_min_global (
+        funct f,
+        const matrix<double,0,1>& bound1,
+        const matrix<double,0,1>& bound2,
+        const std::chrono::nanoseconds max_runtime,
+        double solver_epsilon = 0
+    ) 
+    {
+        return find_min_global(std::move(f), bound1, bound2, max_function_calls(), max_runtime, solver_epsilon);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename funct
+        >
+    function_evaluation find_max_global (
+        funct f,
+        const double bound1,
+        const double bound2,
+        const std::chrono::nanoseconds max_runtime,
+        double solver_epsilon = 0
+    ) 
+    {
+        return find_max_global(std::move(f), bound1, bound2, max_function_calls(), max_runtime, solver_epsilon);
+    }
+
+    template <
+        typename funct
+        >
+    function_evaluation find_min_global (
+        funct f,
+        const double bound1,
+        const double bound2,
+        const std::chrono::nanoseconds max_runtime,
+        double solver_epsilon = 0
+    ) 
+    {
+        return find_min_global(std::move(f), bound1, bound2, max_function_calls(), max_runtime, solver_epsilon);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename funct
+        >
+    function_evaluation find_max_global (
+        funct f,
+        const matrix<double,0,1>& bound1,
+        const matrix<double,0,1>& bound2,
+        const std::vector<bool>& is_integer_variable,
+        const std::chrono::nanoseconds max_runtime,
+        double solver_epsilon = 0
+    ) 
+    {
+        return find_max_global(std::move(f), bound1, bound2, is_integer_variable, max_function_calls(), max_runtime, solver_epsilon);
+    }
+
+    template <
+        typename funct
+        >
+    function_evaluation find_min_global (
+        funct f,
+        const matrix<double,0,1>& bound1,
+        const matrix<double,0,1>& bound2,
+        const std::vector<bool>& is_integer_variable,
+        const std::chrono::nanoseconds max_runtime,
+        double solver_epsilon = 0
+    ) 
+    {
+        return find_min_global(std::move(f), bound1, bound2, is_integer_variable, max_function_calls(), max_runtime, solver_epsilon);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_FiND_GLOBAL_MAXIMUM_hH_
+
diff --git a/ml/dlib/dlib/global_optimization/find_max_global_abstract.h b/ml/dlib/dlib/global_optimization/find_max_global_abstract.h
new file mode 100644
index 000000000..4be62b154
--- /dev/null
+++ b/ml/dlib/dlib/global_optimization/find_max_global_abstract.h
@@ -0,0 +1,496 @@
+// Copyright (C) 2017  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_FiND_GLOBAL_MAXIMUM_ABSTRACT_hH_
+#ifdef DLIB_FiND_GLOBAL_MAXIMUM_ABSTRACT_hH_
+
+#include "upper_bound_function_abstract.h"
+#include "global_function_search_abstract.h"
+#include "../metaprogramming.h"
+#include "../matrix.h"
+#include <utility>
+#include <chrono>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        > 
+    auto call_function_and_expand_args(
+        T&& f, 
+        const matrix<double,0,1>& args
+    ) -> decltype(f(args or args expanded out as discussed below));
+    /*!
+        requires
+            - f is a function object with one of the following signatures:
+                auto f(matrix<double,0,1>)
+                auto f(double)
+                auto f(double,double)
+                auto f(double,double,double)
+                ...
+                auto f(double,double,...,double)  // up to 40 double arguments
+            - if (f() explicitly expands its arguments) then 
+                - args.size() == the number of arguments taken by f.
+        ensures
+            - This function invokes f() with the given arguments and returns the result.
+              However, the signature of f() is allowed to vary.  In particular, if f()
+              takes a matrix<double,0,1> as a single argument then this function simply
+              calls f(args).  However, if f() takes double arguments then args is expanded
+              appropriately, i.e. it calls one of the following as appropriate: 
+                f(args(0))
+                f(args(0),args(1))
+                ...
+                f(args(0),args(1),...,args(N))
+              and the result of f() is returned.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    struct max_function_calls
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This is a simple typed integer class used to strongly type the "max number
+                of function calls" argument to find_max_global() and find_min_global().
+
+        !*/
+
+        max_function_calls() = default;
+
+        explicit max_function_calls(size_t max_calls) : max_calls(max_calls) {}
+
+        size_t max_calls = std::numeric_limits<size_t>::max();
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    const auto FOREVER = std::chrono::hours(24*356*290); // 290 years, basically forever
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename funct
+        >
+    std::pair<size_t,function_evaluation> find_max_global (
+        std::vector<funct>& functions,
+        const std::vector<function_spec>& specs,
+        const max_function_calls num,
+        const std::chrono::nanoseconds max_runtime = FOREVER,
+        double solver_epsilon = 0
+    );
+    /*!
+        requires
+            - functions.size() != 0
+            - functions.size() == specs.size()
+            - solver_epsilon >= 0
+            - for all valid i:
+                - functions[i] is a real valued multi-variate function object.  Moreover,
+                  it must be callable via an expression of the form:
+                  call_function_and_expand_args(functions[i], specs.lower).  This means
+                  function[i] should have a signature like one of the following:
+                    double f(matrix<double,0,1>)
+                    double f(double)
+                    double f(double,double)
+                    etc.
+                - The range of inputs defined by specs[i] must be valid inputs to
+                  functions[i].
+        ensures
+            - This function performs global optimization on the set of given functions.
+              The goal is to maximize the following objective function:
+                 max_{i,x_i}: functions[i](x_i)
+                 subject to the constraints on x_i defined by specs[i].
+              Once found, the return value of find_max_global() is:
+                make_pair(i, function_evaluation(x_i,functions[i](x_i))). 
+              That is, we search for the settings of i and x that return the largest output
+              and return those settings.
+            - The search is performed using the global_function_search object.  See its
+              documentation for details of the algorithm.
+            - We set the global_function_search::get_solver_epsilon() parameter to
+              solver_epsilon.  Therefore, the search will only attempt to find a global
+              maximizer to at most solver_epsilon accuracy.  Once a local maximizer is
+              found to that accuracy the search will focus entirely on finding other maxima
+              elsewhere rather than on further improving the current local optima found so
+              far.  That is, once a local maxima is identified to about solver_epsilon
+              accuracy, the algorithm will spend all its time exploring the functions to
+              find other local maxima to investigate.  An epsilon of 0 means it will keep
+              solving until it reaches full floating point precision.  Larger values will
+              cause it to switch to pure global exploration sooner and therefore might be
+              more effective if your objective function has many local maxima and you don't
+              care about a super high precision solution.
+            - find_max_global() runs until one of the following is true:
+                - The total number of calls to the provided functions is == num.max_calls
+                - More than max_runtime time has elapsed since the start of this function.
+            - Any variables that satisfy the following conditions are optimized on a log-scale:
+                - The lower bound on the variable is > 0
+                - The ratio of the upper bound to lower bound is >= 1000
+                - The variable is not an integer variable
+              We do this because it's common to optimize machine learning models that have
+              parameters with bounds in a range such as [1e-5 to 1e10] (e.g. the SVM C
+              parameter) and it's much more appropriate to optimize these kinds of
+              variables on a log scale.  So we transform them by applying std::log() to
+              them and then undo the transform via std::exp() before invoking the function
+              being optimized.  Therefore, this transformation is invisible to the user
+              supplied functions.  In most cases, it improves the efficiency of the
+              optimizer.
+    !*/
+
+    template <
+        typename funct
+        >
+    std::pair<size_t,function_evaluation> find_min_global (
+        std::vector<funct>& functions,
+        const std::vector<function_spec>& specs,
+        const max_function_calls num,
+        const std::chrono::nanoseconds max_runtime = FOREVER,
+        double solver_epsilon = 0
+    );
+    /*!
+        This function is identical to the find_max_global() defined immediately above,
+        except that we perform minimization rather than maximization.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename funct
+        >
+    function_evaluation find_max_global (
+        funct f,
+        const matrix<double,0,1>& bound1,
+        const matrix<double,0,1>& bound2,
+        const std::vector<bool>& is_integer_variable,
+        const max_function_calls num,
+        const std::chrono::nanoseconds max_runtime = FOREVER,
+        double solver_epsilon = 0
+    );
+    /*!
+        requires
+            - bound1.size() == bound2.size() == is_integer_variable.size()
+            - for all valid i: bound1(i) != bound2(i)
+            - solver_epsilon >= 0
+            - f() is a real valued multi-variate function object.  Moreover, it must be
+              callable via an expression of the form: call_function_and_expand_args(f,
+              bound1).  This means f() should have a signature like one of the following:
+                double f(matrix<double,0,1>)
+                double f(double)
+                double f(double,double)
+                etc.
+            - The range of inputs defined by function_spec(bound1,bound2,is_integer_variable) 
+              must be valid inputs to f().
+        ensures
+            - This function performs global optimization on the given f() function.
+              The goal is to maximize the following objective function:
+                 f(x)
+                 subject to the constraints on x defined by function_spec(bound1,bound2,is_integer_variable).
+              Once found, the return value of find_max_global() is:
+                function_evaluation(x,f(x))). 
+              That is, we search for the setting of x that returns the largest output and
+              return that setting.
+            - The search is performed using the global_function_search object.  See its
+              documentation for details of the algorithm.
+            - We set the global_function_search::get_solver_epsilon() parameter to
+              solver_epsilon.  Therefore, the search will only attempt to find a global
+              maximizer to at most solver_epsilon accuracy.  Once a local maximizer is
+              found to that accuracy the search will focus entirely on finding other maxima
+              elsewhere rather than on further improving the current local optima found so
+              far.  That is, once a local maxima is identified to about solver_epsilon
+              accuracy, the algorithm will spend all its time exploring the function to
+              find other local maxima to investigate.  An epsilon of 0 means it will keep
+              solving until it reaches full floating point precision.  Larger values will
+              cause it to switch to pure global exploration sooner and therefore might be
+              more effective if your objective function has many local maxima and you don't
+              care about a super high precision solution.
+            - find_max_global() runs until one of the following is true:
+                - The total number of calls to f() is == num.max_calls
+                - More than max_runtime time has elapsed since the start of this function.
+            - Any variables that satisfy the following conditions are optimized on a log-scale:
+                - The lower bound on the variable is > 0
+                - The ratio of the upper bound to lower bound is >= 1000
+                - The variable is not an integer variable
+              We do this because it's common to optimize machine learning models that have
+              parameters with bounds in a range such as [1e-5 to 1e10] (e.g. the SVM C
+              parameter) and it's much more appropriate to optimize these kinds of
+              variables on a log scale.  So we transform them by applying std::log() to
+              them and then undo the transform via std::exp() before invoking the function
+              being optimized.  Therefore, this transformation is invisible to the user
+              supplied functions.  In most cases, it improves the efficiency of the
+              optimizer.
+    !*/
+
+    template <
+        typename funct
+        >
+    function_evaluation find_min_global (
+        funct f,
+        const matrix<double,0,1>& bound1,
+        const matrix<double,0,1>& bound2,
+        const std::vector<bool>& is_integer_variable,
+        const max_function_calls num,
+        const std::chrono::nanoseconds max_runtime = FOREVER,
+        double solver_epsilon = 0
+    );
+    /*!
+        This function is identical to the find_max_global() defined immediately above,
+        except that we perform minimization rather than maximization.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+// The following functions are just convenient overloads for calling the above defined
+// find_max_global() and find_min_global() routines.
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename funct
+        >
+    function_evaluation find_max_global (
+        funct f,
+        const matrix<double,0,1>& bound1,
+        const matrix<double,0,1>& bound2,
+        const std::vector<bool>& is_integer_variable,
+        const max_function_calls num,
+        double solver_epsilon 
+    )
+    {
+        return find_max_global(std::move(f), bound1, bound2, std::vector<bool>(bound1.size(),false), num, FOREVER, solver_epsilon);
+    }
+
+    template <
+        typename funct
+        >
+    function_evaluation find_min_global (
+        funct f,
+        const matrix<double,0,1>& bound1,
+        const matrix<double,0,1>& bound2,
+        const std::vector<bool>& is_integer_variable,
+        const max_function_calls num,
+        double solver_epsilon 
+    )
+    {
+        return find_min_global(std::move(f), bound1, bound2, std::vector<bool>(bound1.size(),false), num, FOREVER, solver_epsilon);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename funct
+        >
+    function_evaluation find_max_global (
+        funct f,
+        const matrix<double,0,1>& bound1,
+        const matrix<double,0,1>& bound2,
+        const max_function_calls num,
+        const std::chrono::nanoseconds max_runtime = FOREVER,
+        double solver_epsilon = 0
+    ) 
+    {
+        return find_max_global(std::move(f), bound1, bound2, std::vector<bool>(bound1.size(),false), num, max_runtime, solver_epsilon);
+    }
+
+    template <
+        typename funct
+        >
+    function_evaluation find_min_global (
+        funct f,
+        const matrix<double,0,1>& bound1,
+        const matrix<double,0,1>& bound2,
+        const max_function_calls num,
+        const std::chrono::nanoseconds max_runtime = FOREVER,
+        double solver_epsilon = 0
+    ) 
+    {
+        return find_min_global(std::move(f), bound1, bound2, std::vector<bool>(bound1.size(),false), num, max_runtime, solver_epsilon);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename funct
+        >
+    function_evaluation find_max_global (
+        funct f,
+        const matrix<double,0,1>& bound1,
+        const matrix<double,0,1>& bound2,
+        const max_function_calls num,
+        double solver_epsilon
+    ) 
+    {
+        return find_max_global(std::move(f), bound1, bound2, std::vector<bool>(bound1.size(),false), num, FOREVER, solver_epsilon);
+    }
+
+    template <
+        typename funct
+        >
+    function_evaluation find_min_global (
+        funct f,
+        const matrix<double,0,1>& bound1,
+        const matrix<double,0,1>& bound2,
+        const max_function_calls num,
+        double solver_epsilon
+    ) 
+    {
+        return find_min_global(std::move(f), bound1, bound2, std::vector<bool>(bound1.size(),false), num, FOREVER, solver_epsilon);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename funct
+        >
+    function_evaluation find_max_global (
+        funct f,
+        const double bound1,
+        const double bound2,
+        const max_function_calls num,
+        const std::chrono::nanoseconds max_runtime = FOREVER,
+        double solver_epsilon = 0
+    ) 
+    {
+        return find_max_global(std::move(f), matrix<double,0,1>({bound1}), matrix<double,0,1>({bound2}), num, max_runtime, solver_epsilon);
+    }
+
+    template <
+        typename funct
+        >
+    function_evaluation find_min_global (
+        funct f,
+        const double bound1,
+        const double bound2,
+        const max_function_calls num,
+        const std::chrono::nanoseconds max_runtime = FOREVER,
+        double solver_epsilon = 0
+    ) 
+    {
+        return find_min_global(std::move(f), matrix<double,0,1>({bound1}), matrix<double,0,1>({bound2}), num, max_runtime, solver_epsilon);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename funct
+        >
+    function_evaluation find_max_global (
+        funct f,
+        const double bound1,
+        const double bound2,
+        const max_function_calls num,
+        double solver_epsilon 
+    ) 
+    {
+        return find_max_global(std::move(f), matrix<double,0,1>({bound1}), matrix<double,0,1>({bound2}), num, FOREVER, solver_epsilon);
+    }
+
+    template <
+        typename funct
+        >
+    function_evaluation find_min_global (
+        funct f,
+        const double bound1,
+        const double bound2,
+        const max_function_calls num,
+        double solver_epsilon 
+    ) 
+    {
+        return find_min_global(std::move(f), matrix<double,0,1>({bound1}), matrix<double,0,1>({bound2}), num, FOREVER, solver_epsilon);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename funct
+        >
+    function_evaluation find_max_global (
+        funct f,
+        const matrix<double,0,1>& bound1,
+        const matrix<double,0,1>& bound2,
+        const std::chrono::nanoseconds max_runtime,
+        double solver_epsilon = 0
+    ) 
+    {
+        return find_max_global(std::move(f), bound1, bound2, max_function_calls(), max_runtime, solver_epsilon);
+    }
+
+    template <
+        typename funct
+        >
+    function_evaluation find_min_global (
+        funct f,
+        const matrix<double,0,1>& bound1,
+        const matrix<double,0,1>& bound2,
+        const std::chrono::nanoseconds max_runtime,
+        double solver_epsilon = 0
+    ) 
+    {
+        return find_min_global(std::move(f), bound1, bound2, max_function_calls(), max_runtime, solver_epsilon);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename funct
+        >
+    function_evaluation find_max_global (
+        funct f,
+        const double bound1,
+        const double bound2,
+        const std::chrono::nanoseconds max_runtime,
+        double solver_epsilon = 0
+    ) 
+    {
+        return find_max_global(std::move(f), bound1, bound2, max_function_calls(), max_runtime, solver_epsilon);
+    }
+
+    template <
+        typename funct
+        >
+    function_evaluation find_min_global (
+        funct f,
+        const double bound1,
+        const double bound2,
+        const std::chrono::nanoseconds max_runtime,
+        double solver_epsilon = 0
+    ) 
+    {
+        return find_min_global(std::move(f), bound1, bound2, max_function_calls(), max_runtime, solver_epsilon);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename funct
+        >
+    function_evaluation find_max_global (
+        funct f,
+        const matrix<double,0,1>& bound1,
+        const matrix<double,0,1>& bound2,
+        const std::vector<bool>& is_integer_variable,
+        const std::chrono::nanoseconds max_runtime,
+        double solver_epsilon = 0
+    ) 
+    {
+        return find_max_global(std::move(f), bound1, bound2, is_integer_variable, max_function_calls(), max_runtime, solver_epsilon);
+    }
+
+    template <
+        typename funct
+        >
+    function_evaluation find_min_global (
+        funct f,
+        const matrix<double,0,1>& bound1,
+        const matrix<double,0,1>& bound2,
+        const std::vector<bool>& is_integer_variable,
+        const std::chrono::nanoseconds max_runtime,
+        double solver_epsilon = 0
+    ) 
+    {
+        return find_min_global(std::move(f), bound1, bound2, is_integer_variable, max_function_calls(), max_runtime, solver_epsilon);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_FiND_GLOBAL_MAXIMUM_ABSTRACT_hH_
+
+
diff --git a/ml/dlib/dlib/global_optimization/global_function_search.cpp b/ml/dlib/dlib/global_optimization/global_function_search.cpp
new file mode 100644
index 000000000..fada289a4
--- /dev/null
+++ b/ml/dlib/dlib/global_optimization/global_function_search.cpp
@@ -0,0 +1,942 @@
+
+#include "global_function_search.h"
+#include "upper_bound_function.h"
+#include "../optimization.h"
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    namespace qopt_impl
+    {
+        void fit_quadratic_to_points_mse(
+            const matrix<double>& X,
+            const matrix<double,0,1>& Y,
+            matrix<double>& H,
+            matrix<double,0,1>& g,
+            double& c
+        )
+        {
+            DLIB_CASSERT(X.size() > 0);
+            DLIB_CASSERT(X.nc() == Y.size());
+            DLIB_CASSERT(X.nc() >= (X.nr()+1)*(X.nr()+2)/2);
+
+            const long dims = X.nr();
+            const long M = X.nc();
+
+            matrix<double> W((X.nr()+1)*(X.nr()+2)/2, M);
+
+            set_subm(W, 0,0, dims, M) = X;
+            set_subm(W, dims,0, 1, M) = 1;
+            for (long c = 0; c < X.nc(); ++c)
+            {
+                long wr = dims+1;
+                for (long r = 0; r < X.nr(); ++r)
+                {
+                    for (long r2 = r; r2 < X.nr(); ++r2)
+                    {
+                        W(wr,c) = X(r,c)*X(r2,c);
+                        if (r2 == r)
+                            W(wr,c) *= 0.5;
+                        ++wr;
+                    }
+                }
+            }
+
+            matrix<double,0,1> z = pinv(trans(W))*Y;
+
+            c = z(dims);
+            g = rowm(z, range(0,dims-1));
+
+            H.set_size(dims,dims);
+
+            long wr = dims+1;
+            for (long r = 0; r < X.nr(); ++r)
+            {
+                for (long r2 = r; r2 < X.nr(); ++r2)
+                {
+                    H(r,r2) = H(r2,r) = z(wr++);
+                }
+            }
+        }
+
+    // ----------------------------------------------------------------------------------------
+
+        void fit_quadratic_to_points(
+            const matrix<double>& X,
+            const matrix<double,0,1>& Y,
+            matrix<double>& H,
+            matrix<double,0,1>& g,
+            double& c
+        )
+        /*!
+            requires
+                - X.size() > 0
+                - X.nc() == Y.size()
+                - X.nr()+1 <= X.nc()      
+            ensures
+                - This function finds a quadratic function, Q(x), that interpolates the
+                  given set of points.  If there aren't enough points to uniquely define
+                  Q(x) then the Q(x) that fits the given points with the minimum Frobenius
+                  norm hessian matrix is selected. 
+                - To be precise:
+                    - Let: Q(x) == 0.5*trans(x)*H*x + trans(x)*g + c
+                    - Then this function finds H, g, and c that minimizes the following:
+                        sum(squared(H))
+                      such that:
+                        Q(colm(X,i)) == Y(i),  for all valid i
+                    - If there are more points than necessary to constrain Q then the Q
+                      that best interpolates the function in the mean squared sense is
+                      found.
+        !*/
+        {
+            DLIB_CASSERT(X.size() > 0);
+            DLIB_CASSERT(X.nc() == Y.size());
+            DLIB_CASSERT(X.nr()+1 <= X.nc());
+
+
+            if (X.nc() >= (X.nr()+1)*(X.nr()+2)/2)
+            {
+                fit_quadratic_to_points_mse(X,Y,H,g,c);
+                return;
+            }
+
+
+            const long dims = X.nr();
+            const long M = X.nc();
+
+            /*
+                Our implementation uses the equations 3.9 - 3.12 from the paper:
+                The NEWUOA software for unconstrained optimization without derivatives
+                By M.J.D. Powell, 40th Workshop on Large Scale Nonlinear Optimization (Erice, Italy, 2004)
+            */
+
+            matrix<double> W(M + dims + 1, M + dims + 1);
+
+            set_subm(W, 0, 0, M, M) = 0.5*squared(tmp(trans(X)*X));
+            set_subm(W, 0, M, M, 1) = 1;
+            set_subm(W, M, 0, 1, M) = 1;
+            set_subm(W, M, M, dims+1, dims+1) = 0;
+            set_subm(W, 0, M+1, X.nc(), X.nr()) = trans(X);
+            set_subm(W, M+1, 0, X.nr(), X.nc()) = X;
+
+
+            const matrix<double,0,1> r = join_cols(Y, zeros_matrix<double>(dims+1,1));
+
+            //matrix<double,0,1> z = pinv(W)*r;
+            lu_decomposition<decltype(W)> lu(W);
+            matrix<double,0,1> z = lu.solve(r);
+            //if (lu.is_singular()) std::cout << "WARNING, THE W MATRIX IS SINGULAR!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" << std::endl;
+
+            matrix<double,0,1> lambda = rowm(z, range(0,M-1));
+
+            c = z(M);
+            g = rowm(z, range(M+1,z.size()-1));
+            H = X*diagm(lambda)*trans(X);
+        }
+
+    // ----------------------------------------------------------------------------------------
+
+        struct quad_interp_result
+        {
+            quad_interp_result() = default;
+
+            template <typename EXP>
+            quad_interp_result(
+                const matrix_exp<EXP>& best_x,
+                double predicted_improvement
+            ) : best_x(best_x), predicted_improvement(predicted_improvement)  {}
+
+            matrix<double,0,1> best_x;
+            double predicted_improvement = std::numeric_limits<double>::quiet_NaN();
+        };
+
+    // ----------------------------------------------------------------------------------------
+
+        quad_interp_result find_max_quadraticly_interpolated_vector (
+            const matrix<double,0,1>& anchor,
+            const double radius,
+            const std::vector<matrix<double,0,1>>& x,
+            const std::vector<double>& y,
+            const matrix<double,0,1>& lower,
+            const matrix<double,0,1>& upper
+        )
+        {
+            DLIB_CASSERT(x.size() == y.size());
+            DLIB_CASSERT(x.size() > 0);
+            for (size_t i = 0; i < x.size(); ++i)
+                DLIB_CASSERT(anchor.size() == x[i].size());
+
+            const long x_size = static_cast<long>(x.size());
+            DLIB_CASSERT(anchor.size()+1 <= x_size && x_size <= (anchor.size()+1)*(anchor.size()+2)/2);
+
+
+            matrix<double> X(anchor.size(), x.size());
+            matrix<double,0,1> Y(x.size());
+            for (size_t i = 0; i < x.size(); ++i)
+            {
+                set_colm(X,i) = x[i] - anchor;
+                Y(i) = y[i];
+            }
+
+            matrix<double> H;
+            matrix<double,0,1> g;
+            double c;
+
+            fit_quadratic_to_points(X, Y, H, g, c);
+
+            matrix<double,0,1> p;
+
+            solve_trust_region_subproblem_bounded(-H,-g, radius, p,  0.001, 500, lower-anchor, upper-anchor);
+
+            // ensure we never move more than radius from the anchor.  This might happen if the
+            // trust region subproblem isn't solved accurately enough.
+            if (length(p) >= radius)
+                p *= radius/length(p);
+
+
+            double predicted_improvement = 0.5*trans(p)*H*p + trans(p)*g;
+            return quad_interp_result{clamp(anchor+p,lower,upper), predicted_improvement};
+        }
+
+    // ----------------------------------------------------------------------------------------
+
+        quad_interp_result pick_next_sample_using_trust_region (
+            const std::vector<function_evaluation>& samples,
+            double& radius,
+            const matrix<double,0,1>& lower,
+            const matrix<double,0,1>& upper,
+            const std::vector<bool>& is_integer_variable
+        )
+        {
+            DLIB_CASSERT(samples.size() > 0);
+            // We don't use the QP to optimize integer variables.  Instead, we just fix them at
+            // their best observed value and use the QP to optimize the real variables.  So the
+            // number of dimensions, as far as the QP is concerned, is the number of non-integer
+            // variables.
+            size_t dims = 0;
+            for (auto is_int : is_integer_variable)
+            {
+                if (!is_int)
+                    ++dims;
+            }
+
+            DLIB_CASSERT(samples.size() >= dims+1);
+
+            // Use enough points to fill out a quadratic model or the max available if we don't
+            // have quite enough.
+            const long N = std::min(samples.size(), (dims+1)*(dims+2)/2); 
+
+
+            // first find the best sample;
+            double best_val = -1e300;
+            matrix<double,0,1> best_x;
+            for (auto& v : samples)
+            {
+                if (v.y > best_val)
+                {
+                    best_val = v.y;
+                    best_x = v.x;
+                }
+            }
+
+            // if there are only integer variables then there isn't really anything to do.  So just
+            // return the best_x and say there is no improvement.
+            if (dims == 0)
+                return quad_interp_result(best_x, 0);
+
+            matrix<long,0,1> active_dims(dims);
+            long j = 0;
+            for (size_t i = 0; i < is_integer_variable.size(); ++i)
+            {
+                if (!is_integer_variable[i])
+                    active_dims(j++) = i;
+            }
+
+            // now find the N-1 nearest neighbors of best_x
+            std::vector<std::pair<double,size_t>> distances;
+            for (size_t i = 0; i < samples.size(); ++i)
+                distances.emplace_back(length(best_x-samples[i].x), i);
+            std::sort(distances.begin(), distances.end());
+            distances.resize(N);
+
+            std::vector<matrix<double,0,1>> x;
+            std::vector<double> y;
+            for (auto& idx : distances)
+            {
+                x.emplace_back(rowm(samples[idx.second].x, active_dims));
+                y.emplace_back(samples[idx.second].y);
+            }
+
+            if (radius == 0)
+            {
+                for (auto& idx : distances)
+                    radius = std::max(radius, length(rowm(best_x-samples[idx.second].x, active_dims)) );
+                // Shrink the radius a little so we are always going to be making the sampling of
+                // points near the best current point smaller.
+                radius *= 0.95;
+            }
+
+
+            auto tmp = find_max_quadraticly_interpolated_vector(rowm(best_x,active_dims), radius, x, y, rowm(lower,active_dims), rowm(upper,active_dims));
+
+            // stick the integer variables back into the solution
+            for (long i = 0; i < active_dims.size(); ++i)
+                best_x(active_dims(i)) = tmp.best_x(i);
+
+            tmp.best_x = best_x;
+            return tmp;
+        }
+
+    // ----------------------------------------------------------------------------------------
+
+        matrix<double,0,1> make_random_vector(
+            dlib::rand& rnd,
+            const matrix<double,0,1>& lower,
+            const matrix<double,0,1>& upper,
+            const std::vector<bool>& is_integer_variable
+        )
+        {
+            matrix<double,0,1> temp(lower.size());
+            for (long i = 0; i < temp.size(); ++i)
+            {
+                temp(i) = rnd.get_double_in_range(lower(i), upper(i));
+                if (is_integer_variable[i])
+                    temp(i) = std::round(temp(i));
+            }
+            return temp;
+        }
+
+    // ----------------------------------------------------------------------------------------
+
+        struct max_upper_bound_function 
+        {
+            max_upper_bound_function() = default;
+
+            template <typename EXP>
+            max_upper_bound_function(
+                const matrix_exp<EXP>& x,
+                double predicted_improvement,
+                double upper_bound 
+            ) : x(x), predicted_improvement(predicted_improvement), upper_bound(upper_bound)  {}
+
+            matrix<double,0,1> x;
+            double predicted_improvement = 0;
+            double upper_bound = 0;
+        };
+
+    // ------------------------------------------------------------------------------------
+
+        max_upper_bound_function pick_next_sample_as_max_upper_bound (
+            dlib::rand& rnd,
+            const upper_bound_function& ub,
+            const matrix<double,0,1>& lower,
+            const matrix<double,0,1>& upper,
+            const std::vector<bool>& is_integer_variable,
+            const size_t num_random_samples 
+        )
+        {
+            DLIB_CASSERT(ub.num_points() > 0);
+
+
+
+            // now do a simple random search to find the maximum upper bound
+            double best_ub_so_far = -std::numeric_limits<double>::infinity();
+            matrix<double,0,1> vtemp(lower.size()), v;
+            for (size_t rounds = 0; rounds < num_random_samples; ++rounds)
+            {
+                vtemp = make_random_vector(rnd, lower, upper, is_integer_variable);
+
+                double bound = ub(vtemp);
+                if (bound > best_ub_so_far)
+                {
+                    best_ub_so_far = bound;
+                    v = vtemp;
+                }
+            }
+
+            double max_value = -std::numeric_limits<double>::infinity();
+            for (auto& v : ub.get_points())
+                max_value = std::max(max_value, v.y);
+
+            return max_upper_bound_function(v, best_ub_so_far - max_value, best_ub_so_far);
+        }
+
+    } // end of namespace qopt_impl;
+
+    using namespace qopt_impl;
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    function_spec::function_spec(
+        matrix<double,0,1> bound1, 
+        matrix<double,0,1> bound2
+    ) : 
+        lower(std::move(bound1)), upper(std::move(bound2))
+    {
+        DLIB_CASSERT(lower.size() == upper.size());
+        for (long i = 0; i < lower.size(); ++i)
+        {
+            if (upper(i) < lower(i))
+                std::swap(lower(i), upper(i));
+            DLIB_CASSERT(upper(i) != lower(i), "The upper and lower bounds can't be equal.");
+        }
+        is_integer_variable.assign(lower.size(), false);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    function_spec::function_spec(
+        matrix<double,0,1> bound1,
+        matrix<double,0,1> bound2, 
+        std::vector<bool> is_integer
+    ) : 
+        function_spec(std::move(bound1),std::move(bound2))
+    {
+        is_integer_variable = std::move(is_integer);
+        DLIB_CASSERT(lower.size() == (long)is_integer_variable.size());
+
+
+        // Make sure any integer variables have integer bounds. 
+        for (size_t i = 0; i < is_integer_variable.size(); ++i)
+        {
+            if (is_integer_variable[i])
+            {
+                DLIB_CASSERT(std::round(lower(i)) == lower(i), "If you say a variable is an integer variable then it must have an integer lower bound. \n"
+                    << "lower[i] = " << lower(i));
+                DLIB_CASSERT(std::round(upper(i)) == upper(i), "If you say a variable is an integer variable then it must have an integer upper bound. \n"
+                    << "upper[i] = " << upper(i));
+            }
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    namespace gopt_impl 
+    {
+        upper_bound_function funct_info::build_upper_bound_with_all_function_evals (
+        ) const
+        {
+            upper_bound_function tmp(ub);
+
+            // we are going to add the outstanding evals into this and assume the
+            // outstanding evals are going to take y values equal to their nearest
+            // neighbor complete evals.
+            for (auto& eval : outstanding_evals)
+            {
+                function_evaluation e;
+                e.x = eval.x;
+                e.y = find_nn(ub.get_points(), eval.x);
+                tmp.add(e);
+            }
+
+            return tmp;
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        double funct_info::find_nn (
+            const std::vector<function_evaluation>& evals,
+            const matrix<double,0,1>& x
+        )
+        {
+            double best_y = 0;
+            double best_dist = std::numeric_limits<double>::infinity();
+            for (auto& v : evals)
+            {
+                double dist = length_squared(v.x-x);
+                if (dist < best_dist)
+                {
+                    best_dist = dist;
+                    best_y = v.y;
+                }
+            }
+            return best_y;
+        }
+
+    } // end namespace gopt_impl 
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    function_evaluation_request::function_evaluation_request(
+        function_evaluation_request&& item
+    )
+    {
+        m_has_been_evaluated = item.m_has_been_evaluated;
+        req = item.req;
+        info = item.info;
+        item.info.reset();
+
+        item.m_has_been_evaluated = true;
+    }
+
+// ----------------------------------------------------------------------------------------
+    
+    function_evaluation_request& function_evaluation_request::
+    operator=(
+        function_evaluation_request&& item
+    )
+    {
+        function_evaluation_request(std::move(item)).swap(*this);
+        return *this;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    void function_evaluation_request::
+    swap(
+        function_evaluation_request& item
+    )
+    {
+        std::swap(m_has_been_evaluated, item.m_has_been_evaluated);
+        std::swap(req, item.req);
+        std::swap(info, item.info);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    size_t function_evaluation_request::
+    function_idx (
+    ) const
+    {
+        return info->function_idx;
+    }
+
+    const matrix<double,0,1>& function_evaluation_request::
+    x (
+    ) const
+    {
+        return req.x;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    bool function_evaluation_request::
+    has_been_evaluated (
+    ) const
+    {
+        return m_has_been_evaluated;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    function_evaluation_request::
+    ~function_evaluation_request()
+    {
+        if (!m_has_been_evaluated)
+        {
+            std::lock_guard<std::mutex> lock(*info->m);
+
+            // remove the evaluation request from the outstanding list.
+            auto i = std::find(info->outstanding_evals.begin(), info->outstanding_evals.end(), req);
+            info->outstanding_evals.erase(i);
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    void function_evaluation_request::
+    set (
+        double y
+    )
+    {
+        DLIB_CASSERT(has_been_evaluated() == false);
+        std::lock_guard<std::mutex> lock(*info->m);
+
+        m_has_been_evaluated = true;
+
+
+        // move the evaluation from outstanding to complete
+        auto i = std::find(info->outstanding_evals.begin(), info->outstanding_evals.end(), req);
+        DLIB_CASSERT(i != info->outstanding_evals.end());
+        info->outstanding_evals.erase(i);
+        info->ub.add(function_evaluation(req.x,y));
+
+
+        // Now do trust region radius maintenance and keep track of the best objective
+        // values and all that.
+        if (req.was_trust_region_generated_request)
+        {
+            // Adjust trust region radius based on how good this evaluation
+            // was.
+            double measured_improvement = y-req.anchor_objective_value;
+            double rho = measured_improvement/std::abs(req.predicted_improvement);
+            //std::cout << "rho: "<< rho << std::endl;
+            //std::cout << "radius: "<< info->radius << std::endl;
+            if (rho < 0.25)
+                info->radius *= 0.5;
+            else if (rho > 0.75)
+                info->radius *= 2;
+        }
+
+        if (y > info->best_objective_value)
+        {
+            if (!req.was_trust_region_generated_request && length(req.x - info->best_x) > info->radius*1.001)
+            {
+                //std::cout << "reset radius because of big move, " << length(req.x - info->best_x) << "  radius was " << info->radius << std::endl;
+                // reset trust region radius since we made a big move.  Doing this will
+                // cause the radius to be reset to the size of the local region.
+                info->radius = 0;
+            }
+            info->best_objective_value = y;
+            info->best_x = std::move(req.x);
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    global_function_search::
+    global_function_search(
+        const function_spec& function
+    ) : global_function_search(std::vector<function_spec>(1,function)) {}
+
+// ----------------------------------------------------------------------------------------
+
+    global_function_search::
+    global_function_search(
+        const std::vector<function_spec>& functions_
+    )
+    {
+        DLIB_CASSERT(functions_.size() > 0);
+        m = std::make_shared<std::mutex>();
+        functions.reserve(functions_.size());
+        for (size_t i = 0; i < functions_.size(); ++i)
+            functions.emplace_back(std::make_shared<gopt_impl::funct_info>(functions_[i],i,m));
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    global_function_search::
+    global_function_search(
+        const std::vector<function_spec>& functions_,
+        const std::vector<std::vector<function_evaluation>>& initial_function_evals,
+        const double relative_noise_magnitude_
+    ) : 
+        global_function_search(functions_) 
+    {
+        DLIB_CASSERT(functions_.size() > 0);
+        DLIB_CASSERT(functions_.size() == initial_function_evals.size());
+        DLIB_CASSERT(relative_noise_magnitude >= 0);
+        relative_noise_magnitude = relative_noise_magnitude_;
+        for (size_t i = 0; i < initial_function_evals.size(); ++i)
+        {
+            functions[i]->ub = upper_bound_function(initial_function_evals[i], relative_noise_magnitude);
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    size_t global_function_search::
+    num_functions(
+    ) const 
+    { 
+        return functions.size();
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    void global_function_search::
+    set_seed (
+        time_t seed
+    )
+    {
+        rnd = dlib::rand(seed);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    void global_function_search::
+    get_function_evaluations (
+        std::vector<function_spec>& specs,
+        std::vector<std::vector<function_evaluation>>& function_evals
+    ) const
+    {
+        std::lock_guard<std::mutex> lock(*m);
+        specs.clear();
+        function_evals.clear();
+        for (size_t i = 0; i < functions.size(); ++i)
+        {
+            specs.emplace_back(functions[i]->spec);
+            function_evals.emplace_back(functions[i]->ub.get_points());
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    void global_function_search::
+    get_best_function_eval (
+        matrix<double,0,1>& x,
+        double& y,
+        size_t& function_idx
+    ) const
+    {
+        DLIB_CASSERT(num_functions() != 0);
+
+        std::lock_guard<std::mutex> lock(*m);
+
+        // find the largest value
+        auto& info = *best_function(function_idx);
+        y = info.best_objective_value;
+        x = info.best_x;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    function_evaluation_request global_function_search::
+    get_next_x (
+    ) 
+    {
+        DLIB_CASSERT(num_functions() != 0);
+
+        using namespace gopt_impl;
+
+        std::lock_guard<std::mutex> lock(*m);
+
+
+        // the first thing we do is make sure each function has at least max(3,dimensionality of function) evaluations
+        for (auto& info : functions)
+        {
+            const long dims = info->spec.lower.size();
+            // If this is the very beginning of the optimization process
+            if (info->ub.num_points()+info->outstanding_evals.size() < 1)
+            {
+                outstanding_function_eval_request new_req;
+                new_req.request_id = next_request_id++;
+                // Pick the point right in the center of the bounds to evaluate first since
+                // people will commonly center the bound on a location they think is good.
+                // So might as well try there first.
+                new_req.x = (info->spec.lower + info->spec.upper)/2.0;
+                for (long i = 0; i < new_req.x.size(); ++i)
+                {
+                    if (info->spec.is_integer_variable[i])
+                        new_req.x(i) = std::round(new_req.x(i));
+                }
+                info->outstanding_evals.emplace_back(new_req);
+                return function_evaluation_request(new_req,info);
+            }
+            else if (info->ub.num_points() < std::max<long>(3,dims))
+            {
+                outstanding_function_eval_request new_req;
+                new_req.request_id = next_request_id++;
+                new_req.x = make_random_vector(rnd, info->spec.lower, info->spec.upper, info->spec.is_integer_variable);
+                info->outstanding_evals.emplace_back(new_req);
+                return function_evaluation_request(new_req,info);
+            }
+        }
+
+
+        if (do_trust_region_step && !has_outstanding_trust_region_request())
+        {
+            // find the currently best performing function, we will do a trust region
+            // step on it.
+            auto info = best_function();
+            const long dims = info->spec.lower.size();
+            // if we have enough points to do a trust region step
+            if (info->ub.num_points() > dims+1)
+            {
+                auto tmp = pick_next_sample_using_trust_region(info->ub.get_points(),
+                    info->radius, info->spec.lower, info->spec.upper, info->spec.is_integer_variable);
+                //std::cout << "QP predicted improvement: "<< tmp.predicted_improvement << std::endl;
+                if (tmp.predicted_improvement > min_trust_region_epsilon)
+                {
+                    do_trust_region_step = false;
+                    outstanding_function_eval_request new_req;
+                    new_req.request_id = next_request_id++;
+                    new_req.x = tmp.best_x;
+                    new_req.was_trust_region_generated_request = true;
+                    new_req.anchor_objective_value = info->best_objective_value;
+                    new_req.predicted_improvement = tmp.predicted_improvement;
+                    info->outstanding_evals.emplace_back(new_req);
+                    return function_evaluation_request(new_req, info);
+                }
+            }
+        }
+
+        // make it so we alternate between upper bounded and trust region steps.
+        do_trust_region_step = true;
+
+        if (rnd.get_random_double() >= pure_random_search_probability)
+        {
+            // pick a point at random to sample according to the upper bound
+            double best_upper_bound = -std::numeric_limits<double>::infinity();
+            std::shared_ptr<funct_info> best_funct;
+            matrix<double,0,1> next_sample;
+            // so figure out if any function has a good upper bound and if so pick the
+            // function with the largest upper bound for evaluation.
+            for (auto& info : functions)
+            {
+                auto tmp = pick_next_sample_as_max_upper_bound(rnd,
+                    info->build_upper_bound_with_all_function_evals(), info->spec.lower, info->spec.upper,
+                    info->spec.is_integer_variable,  num_random_samples);
+                if (tmp.predicted_improvement > 0 && tmp.upper_bound > best_upper_bound) 
+                {
+                    best_upper_bound = tmp.upper_bound;
+                    next_sample = std::move(tmp.x);
+                    best_funct = info;
+                }
+            }
+
+            // if we found a good function to evaluate then return that. 
+            if (best_funct)
+            {
+                outstanding_function_eval_request new_req;
+                new_req.request_id = next_request_id++;
+                new_req.x = std::move(next_sample);
+                best_funct->outstanding_evals.emplace_back(new_req);
+                return function_evaluation_request(new_req, best_funct);
+            }
+        }
+
+
+        // pick entirely at random
+        size_t function_idx = rnd.get_integer(functions.size());
+        auto info = functions[function_idx];
+        outstanding_function_eval_request new_req;
+        new_req.request_id = next_request_id++;
+        new_req.x = make_random_vector(rnd, info->spec.lower, info->spec.upper, info->spec.is_integer_variable);
+        info->outstanding_evals.emplace_back(new_req);
+        return function_evaluation_request(new_req, info);
+
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    double global_function_search::
+    get_pure_random_search_probability (
+    ) const 
+    { 
+        return pure_random_search_probability; 
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    void global_function_search::
+    set_pure_random_search_probability (
+        double prob
+    ) 
+    {
+        DLIB_CASSERT(0 <= prob && prob <= 1);
+        pure_random_search_probability = prob;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    double global_function_search::
+    get_solver_epsilon (
+    ) const 
+    { 
+        return min_trust_region_epsilon; 
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    void global_function_search::
+    set_solver_epsilon (
+        double eps
+    )
+    {
+        DLIB_CASSERT(0 <= eps);
+        min_trust_region_epsilon = eps;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    double global_function_search::
+    get_relative_noise_magnitude (
+    ) const 
+    { 
+        return relative_noise_magnitude; 
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    void global_function_search::
+    set_relative_noise_magnitude (
+        double value
+    )
+    {
+        DLIB_CASSERT(0 <= value);
+        relative_noise_magnitude = value;
+        if (m)
+        {
+            std::lock_guard<std::mutex> lock(*m);
+            // recreate all the upper bound functions with the new relative noise magnitude
+            for (auto& f : functions)
+                f->ub = upper_bound_function(f->ub.get_points(), relative_noise_magnitude);
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    size_t global_function_search::
+    get_monte_carlo_upper_bound_sample_num (
+    ) const 
+    { 
+        return num_random_samples; 
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    void global_function_search::
+    set_monte_carlo_upper_bound_sample_num (
+        size_t num
+    )
+    {
+        DLIB_CASSERT(0 <= num);
+        num_random_samples = num;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    std::shared_ptr<gopt_impl::funct_info> global_function_search::
+    best_function(
+    ) const
+    {
+        size_t idx = 0;
+        return best_function(idx);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    std::shared_ptr<gopt_impl::funct_info> global_function_search::
+    best_function(
+        size_t& idx
+    ) const
+    {
+        auto compare = [](const std::shared_ptr<gopt_impl::funct_info>& a, const std::shared_ptr<gopt_impl::funct_info>& b) 
+            { return a->best_objective_value < b->best_objective_value; };
+
+        auto i = std::max_element(functions.begin(), functions.end(), compare);
+
+        idx = std::distance(functions.begin(),i);
+        return *i;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    bool global_function_search::
+    has_outstanding_trust_region_request (
+    ) const 
+    {
+        for (auto& f : functions)
+        {
+            for (auto& i : f->outstanding_evals)
+            {
+                if (i.was_trust_region_generated_request)
+                    return true;
+            }
+        }
+        return false;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
diff --git a/ml/dlib/dlib/global_optimization/global_function_search.h b/ml/dlib/dlib/global_optimization/global_function_search.h
new file mode 100644
index 000000000..fa036884a
--- /dev/null
+++ b/ml/dlib/dlib/global_optimization/global_function_search.h
@@ -0,0 +1,245 @@
+// Copyright (C) 2017  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_GLOBAL_FuNCTION_SEARCH_Hh_
+#define DLIB_GLOBAL_FuNCTION_SEARCH_Hh_
+
+#include "global_function_search_abstract.h"
+#include <vector>
+#include "../matrix.h"
+#include <mutex>
+#include "../rand.h"
+#include "upper_bound_function.h"
+#include "../test_for_odr_violations.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    struct function_spec
+    {
+        function_spec(
+            matrix<double,0,1> bound1, 
+            matrix<double,0,1> bound2
+        );
+
+        function_spec(
+            matrix<double,0,1> bound1, 
+            matrix<double,0,1> bound2, 
+            std::vector<bool> is_integer
+        );
+
+        matrix<double,0,1> lower;
+        matrix<double,0,1> upper;
+        std::vector<bool> is_integer_variable;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    namespace gopt_impl 
+    {
+        struct outstanding_function_eval_request
+        {
+            size_t request_id = 0;   // unique id for this eval request
+            matrix<double,0,1> x;   // function x to evaluate 
+
+            // trust region specific stuff
+            bool was_trust_region_generated_request = false;
+            double predicted_improvement = std::numeric_limits<double>::quiet_NaN();
+            double anchor_objective_value = std::numeric_limits<double>::quiet_NaN(); // objective value at center of TR step
+
+            bool operator==(const outstanding_function_eval_request& item) const { return request_id == item.request_id; }
+        };
+
+        struct funct_info
+        {
+            funct_info() = delete;
+            funct_info(const funct_info&) = delete;
+            funct_info& operator=(const funct_info&) = delete;
+
+            funct_info(
+                const function_spec& spec,
+                size_t function_idx, 
+                const std::shared_ptr<std::mutex>& m
+            ) : 
+                spec(spec), function_idx(function_idx), m(m)
+            {
+                best_x = zeros_matrix(spec.lower);
+            }
+
+            upper_bound_function build_upper_bound_with_all_function_evals (
+            ) const;
+
+            static double find_nn (
+                const std::vector<function_evaluation>& evals,
+                const matrix<double,0,1>& x
+            );
+
+
+            function_spec spec;
+            size_t function_idx = 0;
+            std::shared_ptr<std::mutex> m;
+            upper_bound_function ub;
+            std::vector<outstanding_function_eval_request> outstanding_evals;
+            matrix<double,0,1> best_x; 
+            double best_objective_value = -std::numeric_limits<double>::infinity(); 
+            double radius = 0;
+        };
+
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    class function_evaluation_request
+    {
+    public:
+
+        function_evaluation_request() = delete;
+        function_evaluation_request(const function_evaluation_request&) = delete;
+        function_evaluation_request& operator=(const function_evaluation_request&) = delete;
+
+
+        function_evaluation_request(function_evaluation_request&& item);
+        function_evaluation_request& operator=(function_evaluation_request&& item);
+
+        ~function_evaluation_request();
+
+        size_t function_idx (
+        ) const;
+
+        const matrix<double,0,1>& x (
+        ) const;
+
+        bool has_been_evaluated (
+        ) const;
+
+        void set (
+            double y
+        );
+
+        void swap(function_evaluation_request& item);
+
+    private:
+
+        friend class global_function_search;
+
+        explicit function_evaluation_request(
+            const gopt_impl::outstanding_function_eval_request& req,
+            const std::shared_ptr<gopt_impl::funct_info>& info
+        ) : req(req), info(info) {}
+
+        bool m_has_been_evaluated = false;
+        gopt_impl::outstanding_function_eval_request req;
+        std::shared_ptr<gopt_impl::funct_info> info;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    class global_function_search
+    {
+    public:
+
+        global_function_search() = default;
+
+        explicit global_function_search(
+            const function_spec& function
+        ); 
+
+        explicit global_function_search(
+            const std::vector<function_spec>& functions_
+        );
+
+        global_function_search(
+            const std::vector<function_spec>& functions_,
+            const std::vector<std::vector<function_evaluation>>& initial_function_evals,
+            const double relative_noise_magnitude = 0.001
+        ); 
+
+        global_function_search(const global_function_search&) = delete;
+        global_function_search& operator=(const global_function_search& item) = delete;
+
+        global_function_search(global_function_search&& item) = default;
+        global_function_search& operator=(global_function_search&& item) = default;
+
+        size_t num_functions(
+        ) const;
+
+        void set_seed (
+            time_t seed
+        );
+
+        void get_function_evaluations (
+            std::vector<function_spec>& specs,
+            std::vector<std::vector<function_evaluation>>& function_evals
+        ) const;
+
+        void get_best_function_eval (
+            matrix<double,0,1>& x,
+            double& y,
+            size_t& function_idx
+        ) const;
+
+        function_evaluation_request get_next_x (
+        ); 
+
+        double get_pure_random_search_probability (
+        ) const; 
+
+        void set_pure_random_search_probability (
+            double prob
+        );
+
+        double get_solver_epsilon (
+        ) const; 
+
+        void set_solver_epsilon (
+            double eps
+        );
+
+        double get_relative_noise_magnitude (
+        ) const; 
+
+        void set_relative_noise_magnitude (
+            double value
+        );
+
+        size_t get_monte_carlo_upper_bound_sample_num (
+        ) const; 
+
+        void set_monte_carlo_upper_bound_sample_num (
+            size_t num
+        );
+
+    private:
+
+        std::shared_ptr<gopt_impl::funct_info> best_function(
+        ) const;
+
+        std::shared_ptr<gopt_impl::funct_info> best_function(
+            size_t& idx
+        ) const;
+
+        bool has_outstanding_trust_region_request (
+        ) const;
+
+
+        dlib::rand rnd;
+        double pure_random_search_probability = 0.02;
+        double min_trust_region_epsilon = 0;
+        double relative_noise_magnitude = 0.001;
+        size_t num_random_samples = 5000;
+        bool do_trust_region_step = true;
+
+        size_t next_request_id = 1;
+
+        std::vector<std::shared_ptr<gopt_impl::funct_info>> functions;
+        std::shared_ptr<std::mutex> m;
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_GLOBAL_FuNCTION_SEARCH_Hh_
+
diff --git a/ml/dlib/dlib/global_optimization/global_function_search_abstract.h b/ml/dlib/dlib/global_optimization/global_function_search_abstract.h
new file mode 100644
index 000000000..c8bfc3993
--- /dev/null
+++ b/ml/dlib/dlib/global_optimization/global_function_search_abstract.h
@@ -0,0 +1,605 @@
+// Copyright (C) 2017  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_GLOBAL_FuNCTION_SEARCH_ABSTRACT_Hh_
+#ifdef DLIB_GLOBAL_FuNCTION_SEARCH_ABSTRACT_Hh_
+
+#include <vector>
+#include "../matrix.h"
+#include "upper_bound_function_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    struct function_spec
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object is a simple struct that lets you define the valid inputs to a
+                multivariate function.  It lets you define bound constraints for each
+                variable as well as say if a variable is integer valued or not.  Therefore,
+                an instance of this struct says that a function takes upper.size() input
+                variables, where the ith variable must be in the range [lower(i) upper(i)]
+                and be an integer if is_integer_variable[i]==true.
+        !*/
+
+        function_spec(
+            matrix<double,0,1> bound1, 
+            matrix<double,0,1> bound2
+        );
+        /*!
+            requires
+                - bound1.size() == bound2.size()
+                - for all valid i: bound1(i) != bound2(i)
+            ensures
+                - #is_integer_variable.size() == bound1.size()
+                - #lower.size() == bound1.size()
+                - #upper.size() == bound1.size()
+                - for all valid i:
+                    - #is_integer_variable[i] == false
+                    - #lower(i) == min(bound1(i), bound2(i))
+                    - #upper(i) == max(bound1(i), bound2(i))
+        !*/
+
+        function_spec(
+            matrix<double,0,1> lower, 
+            matrix<double,0,1> upper, 
+            std::vector<bool> is_integer
+        );
+        /*!
+            requires
+                - bound1.size() == bound2.size() == is_integer.size()
+                - for all valid i: bound1(i) != bound2(i)
+            ensures
+                - #is_integer_variable.size() == bound1.size()
+                - #lower.size() == bound1.size()
+                - #upper.size() == bound1.size()
+                - for all valid i:
+                    - #is_integer_variable[i] == is_integer[i] 
+                    - #lower(i) == min(bound1(i), bound2(i))
+                    - #upper(i) == max(bound1(i), bound2(i))
+        !*/
+
+        matrix<double,0,1> lower;
+        matrix<double,0,1> upper;
+        std::vector<bool> is_integer_variable;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    class function_evaluation_request
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a request, by the global_function_search object, to
+                evaluate a real-valued function and report back the results.  
+
+            THREAD SAFETY
+                You shouldn't let more than one thread touch a function_evaluation_request
+                at the same time.  However, it is safe to send instances of this class to
+                other threads for processing.  This lets you evaluate multiple
+                function_evaluation_requests in parallel.  Any appropriate synchronization
+                with regard to the originating global_function_search instance is handled
+                automatically.
+        !*/
+
+    public:
+
+        // You can't make or copy this object, the only way to get one is from the
+        // global_function_search class via get_next_x().
+        function_evaluation_request() = delete;
+        function_evaluation_request(const function_evaluation_request&) = delete;
+        function_evaluation_request& operator=(const function_evaluation_request&) = delete;
+
+        // You can however move and swap this object.
+        function_evaluation_request(function_evaluation_request&& item);
+        function_evaluation_request& operator=(function_evaluation_request&& item);
+        /*!
+            ensures
+                - *this takes the state of item.  
+                - #item.has_been_evaluated() == true
+        !*/
+
+        ~function_evaluation_request(
+        );
+        /*!
+            ensures
+                - frees all resources associated with this object.  
+                - It's fine to destruct function_evaluation_requests even if they haven't
+                  been evaluated yet. If this happens it will simply be as if the request
+                  was never issued.
+        !*/
+
+        size_t function_idx (
+        ) const;
+        /*!
+            ensures
+                - Returns the function index that identifies which function is to be
+                  evaluated.  
+        !*/
+
+        const matrix<double,0,1>& x (
+        ) const;
+        /*!
+            ensures
+                - returns the input parameters to the function to be evaluated.
+        !*/
+
+        bool has_been_evaluated (
+        ) const;
+        /*!
+            ensures
+                - If this evaluation request is still outstanding then returns false,
+                  otherwise returns true.  That is, if the global_function_search is still
+                  waiting for you report back by calling set() then
+                  has_been_evaluated()==false.
+        !*/
+
+        void set (
+            double y
+        );
+        /*!
+            requires
+                - has_been_evaluated() == false
+            ensures
+                - #has_been_evaluated() == true
+                - Notifies the global_function_search instance that created this object
+                  that when the function_idx()th function is evaluated with x() as input
+                  then the output is y.
+        !*/
+
+        void swap(
+            function_evaluation_request& item
+        );
+        /*!
+            ensures
+                - swaps the state of *this and item
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    class global_function_search
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object performs global optimization of a set of user supplied
+                functions.  The goal is to maximize the following objective function:
+                    max_{function_i,x_i}: function_i(x_i)
+                subject to bound constraints on each element of x_i.  Moreover, each
+                element of x_i can be either real valued or integer valued.  Each of the
+                functions can also take a different number of variables.  Therefore, the
+                final result of the optimization tells you which function produced the
+                largest output and what input (i.e. the x value) to that function is
+                necessary to obtain that maximal value.
+
+                Importantly, the global_function_search object does not require the user to
+                supply derivatives.  Moreover, the functions may contain discontinuities,
+                behave stochastically, and have many local maxima.  The global_function_search 
+                object will attempt to find the global optima in the face of these challenges.  
+                It is also designed to use as few function evaluations as possible, making
+                it suitable for optimizing functions that are very expensive to evaluate.  
+
+                It does this by alternating between two modes.  A global exploration mode
+                and a local optima refinement mode.  This is accomplished by building and
+                maintaining two models of the objective function:
+                    1. A global model that upper bounds our objective function.  This is a
+                       non-parametric piecewise linear model based on all function
+                       evaluations ever seen by the global_function_search object.  
+                    2. A local quadratic model fit around the best point seen so far.   
+                
+                The optimization procedure therefore looks like this:
+
+                    while(not done) 
+                    {
+                        DO GLOBAL EXPLORE STEP:
+                           Find the point that maximizes the upper bounding model since
+                           that is the point with the largest possible improvement in the
+                           objective function.
+
+                           Evaluate the new point and incorporate it into our models.
+
+                        DO LOCAL REFINEMENT STEP:
+                           Find the optimal solution to the local quadratic model.  
+                           
+                           If this point looks like it will improve on the "best point seen
+                           so far" by at least get_solver_epsilon() then we evaluate that
+                           point and incorporate it into our models, otherwise we ignore
+                           it.
+                    }
+
+                You can see that we alternate between global search and local refinement,
+                except in the case where the local model seems to have converged to within
+                get_solver_epsilon() accuracy.  In that case only global search steps are
+                used.  We do this in the hope that the global search will find a new and
+                better local optima to explore, which would then reactivate local
+                refinement when it has something productive to do. 
+
+                
+                Now let's turn our attention to the specific API defined by the
+                global_function_search object.  We will begin by showing a short example of
+                its use:
+
+                    // Suppose we want to find which of these functions, F() and G(), have
+                    // the largest output and what input is necessary to produce the
+                    // maximal output.
+                    auto F = [](double a, double b) { return  -std::pow(a-2,2.0) - std::pow(b-4,2.0); };
+                    auto G = [](double x)           { return 2-std::pow(x-5,2.0); };
+
+                    // We first define function_spec objects that specify bounds on the
+                    // inputs to each function.  The search process will only search within
+                    // these bounds.
+                    function_spec spec_F({-10,-10}, {10,10});
+                    function_spec spec_G({-2}, {6});
+                    // Then we create a global_function_search object with those function specifications.
+                    global_function_search opt({spec_F, spec_G});
+
+                    // Here we run 15 iterations of the search process.  Note that the user
+                    // of global_function_search writes the main solver loop, which is
+                    // somewhat unusual.  We will discuss why that is in a moment, but for
+                    // now let's look at this example.
+                    for (int i = 0; i < 15; ++i)
+                    {
+                        // All we do here is ask the global_function_search object what to
+                        // evaluate next, then do what it asked, and then report the
+                        // results back by calling function_evaluation_request's set()
+                        // method.  
+                        function_evaluation_request next = opt.get_next_x();
+                        // next.function_idx() tells you which of the functions you should
+                        // evaluate.  We have 2 functions here (F and G) so function_idx()
+                        // can take only the values 0 and 1.  If, for example, we had 10
+                        // functions it would take the values 0 through 9.
+                        if (next.function_idx() == 0)
+                        {
+                            // Call F with the inputs requested by the
+                            // global_function_search and report them back.
+                            double a = next.x()(0);
+                            double b = next.x()(1);
+                            next.set(F(a,b));  // Tell the solver what happened.
+                        }
+                        else
+                        {
+                            double x = next.x()(0);
+                            next.set(G(x));
+                        }
+                    }
+
+                    // Find out what point gave the largest outputs:
+                    matrix<double,0,1> x;
+                    double y;
+                    size_t function_idx;
+                    opt.get_best_function_eval(x,y,function_idx);
+
+                    cout << "function_idx: "<< function_idx << endl; 
+                    cout << "y: " << y << endl; 
+                    cout << "x: " << x << endl; 
+
+                The above cout statements will print this:
+
+                    function_idx: 1
+                    y: 2
+                    x: 5 
+                
+                Which is the correct result since G(5) gives the largest possible output in
+                our example.
+
+                So why does the user write the main loop?  Why isn't it embedded inside
+                dlib?  Well, there are two answers to this.  The first is that it is.  Most
+                users should just call dlib::find_max_global() which does exactly that, it
+                runs the loop for you.  However, the API shown above gives you the
+                opportunity to run multiple function evaluations in parallel.  For
+                instance, it is perfectly valid to call get_next_x() multiple times and
+                send the resulting function_evaluation_request objects to separate threads
+                for processing.  Those separate threads can run the functions being
+                optimized (e.g. F and G or whatever) and report back by calling
+                function_evaluation_request::set().  You could even spread the work across
+                a compute cluster if you have one.
+
+                So what happens if you have N outstanding function evaluation requests?
+                Or in other words, what happens if you called get_next_x() N times and
+                haven't yet called their set() methods?  Well, 1 of the N requests will be
+                a local refinement step while the N-1 other requests will be global
+                exploration steps generated from the current upper bounding model.  This
+                should give you an idea of the usefulness of this kind of parallelism.  If
+                for example, your functions being optimized were simple convex functions
+                this kind of parallelism wouldn't help since essentially all the
+                interesting work in the solver is going to be done by the local optimizer.
+                However, if your function has a lot of local optima, running many global
+                exploration steps in parallel might significantly reduce the time it takes
+                to find a good solution.
+                
+                It should also be noted that our upper bounding model is implemented by the
+                dlib::upper_bound_function object, which is a tool that allows us to create
+                a tight upper bound on our objective function.  This upper bound is
+                non-parametric and gets progressively more accurate as the optimization
+                progresses, but also more and more expensive to maintain.  It causes the
+                runtime of the entire optimization procedure to be O(N^2) where N is the
+                number of objective function evaluations. So problems that require millions
+                of function evaluations to find a good solution are not appropriate for the
+                global_function_search tool.  However, if your objective function is very
+                expensive to evaluate then this relatively expensive upper bounding model
+                is well worth its computational cost.
+                
+                Finally, let's introduce some background literature on this algorithm.  The
+                two most relevant papers in the optimization literature are:
+                    Global optimization of Lipschitz functions Malherbe, Cédric and Vayatis,
+                    Nicolas International Conference on Machine Learning - 2017
+                and
+                    The NEWUOA software for unconstrained optimization without derivatives By
+                    M.J.D. Powell, 40th Workshop on Large Scale Nonlinear Optimization (Erice,
+                    Italy, 2004)
+
+                Our upper bounding model is an extension of the AdaLIPO method in the
+                Malherbe.  See the documentation of dlib::upper_bound_function for more
+                details on that, as we make a number of important extensions.  The other
+                part of our method, our local refinement model, is essentially the same
+                type of trust region model proposed by Powell in the above paper.  That is,
+                each time we do a local refinement step we identify the best point seen so
+                far, fit a quadratic function around it using the function evaluations we
+                have collected so far, and then use a simple trust region procedure to
+                decide the next best point to evaluate based on our quadratic model.  
+
+                The method proposed by Malherbe gives excellent global search performance
+                but has terrible convergence properties in the area around a maxima.
+                Powell's method on the other hand has excellent convergence in the area
+                around a local maxima, as expected by a quadratic trust region method, but
+                is aggressively local maxima seeking.  It will simply get stuck in the
+                nearest local optima.  Combining the two together as we do here gives us
+                excellent performance in both global search and final convergence speed
+                near a local optima.  Causing the global_function_search to perform well
+                for functions with many local optima while still giving high precision
+                solutions.  For instance, on typical tests problems, like the Holder table
+                function, the global_function_search object can reliably find the globally
+                optimal solution to full floating point precision in under a few hundred
+                steps.
+
+
+            THREAD SAFETY
+                You shouldn't let more than one thread touch a global_function_search
+                instance at the same time.  
+        !*/
+
+    public:
+
+        global_function_search(
+        );
+        /*!
+            ensures
+                - #num_functions() == 0
+                - #get_relative_noise_magnitude() == 0.001
+                - #get_solver_epsilon() == 0
+                - #get_monte_carlo_upper_bound_sample_num() == 5000
+                - #get_pure_random_search_probability() == 0.02
+        !*/
+
+        explicit global_function_search(
+            const function_spec& function
+        ); 
+        /*!
+            ensures
+                - #num_functions() == 1
+                - #get_function_evaluations() will indicate that there are no function evaluations yet.
+                - #get_relative_noise_magnitude() == 0.001
+                - #get_solver_epsilon() == 0
+                - #get_monte_carlo_upper_bound_sample_num() == 5000
+                - #get_pure_random_search_probability() == 0.02
+        !*/
+
+        explicit global_function_search(
+            const std::vector<function_spec>& functions
+        );
+        /*!
+            ensures
+                - #num_functions() == functions.size()
+                - #get_function_evaluations() will indicate that there are no function evaluations yet.
+                - #get_relative_noise_magnitude() == 0.001
+                - #get_solver_epsilon() == 0
+                - #get_monte_carlo_upper_bound_sample_num() == 5000
+                - #get_pure_random_search_probability() == 0.02
+        !*/
+
+        global_function_search(
+            const std::vector<function_spec>& functions,
+            const std::vector<std::vector<function_evaluation>>& initial_function_evals,
+            const double relative_noise_magnitude = 0.001
+        ); 
+        /*!
+            requires
+                - functions.size() == initial_function_evals.size()
+                - relative_noise_magnitude >= 0
+            ensures
+                - #num_functions() == functions.size()
+                - #get_function_evaluations() will return the provided initial_function_evals.
+                - #get_relative_noise_magnitude() == relative_noise_magnitude
+                - #get_solver_epsilon() == 0
+                - #get_monte_carlo_upper_bound_sample_num() == 5000
+                - #get_pure_random_search_probability() == 0.02
+        !*/
+
+        // This object can't be copied.
+        global_function_search(const global_function_search&) = delete;
+        global_function_search& operator=(const global_function_search& item) = delete;
+        // But it can be moved
+        global_function_search(global_function_search&& item) = default;
+        global_function_search& operator=(global_function_search&& item) = default;
+        /*!
+            ensures
+                - moves the state of item into *this
+                - #item.num_functions() == 0
+        !*/
+
+        void set_seed (
+            time_t seed
+        );
+        /*!
+            ensures
+                - Part of this object's algorithm uses random sampling to decide what
+                  points to evaluate next.  Calling set_seed() lets you set the seed used
+                  by the random number generator.   Note that if you don't call set_seed()
+                  you will always get the same deterministic behavior.
+        !*/
+
+        size_t num_functions(
+        ) const;
+        /*!
+            ensures
+                - returns the number of functions being optimized.
+        !*/
+
+        void get_function_evaluations (
+            std::vector<function_spec>& specs,
+            std::vector<std::vector<function_evaluation>>& function_evals
+        ) const;
+        /*!
+            ensures
+                - #specs.size() == num_functions()
+                - #function_evals.size() == num_functions()
+                - This function allows you to query the state of the solver.  In
+                  particular, you can find the function_specs for each function being
+                  optimized and their recorded evaluations.  
+                - for all valid i:
+                    - function_evals[i] == all the function evaluations that have been
+                      recorded for the ith function (i.e. the function with the
+                      function_spec #specs[i]).  That is, this is the record of all the x
+                      and y values reported back by function_evaluation_request::set()
+                      calls.
+        !*/
+
+        void get_best_function_eval (
+            matrix<double,0,1>& x,
+            double& y,
+            size_t& function_idx
+        ) const;
+        /*!
+            requires
+                - num_functions() != 0
+            ensures
+                - if (no function evaluations have been recorded yet) then
+                    - The outputs of this function are in a valid but undefined state.
+                - else
+                    - This function tells you which function has produced the largest
+                      output seen so far.  It also tells you the inputs to that function
+                      that leads to those outputs (x) as well as the output value itself (y).
+                    - 0 <= #function_idx < num_functions()
+                    - #function_idx == the index of the function that produced the largest output seen so far.
+                    - #x == the input parameters to the function that produced the largest outputs seen so far.
+                    - #y == the largest output seen so far.
+        !*/
+
+        function_evaluation_request get_next_x (
+        ); 
+        /*!
+            requires
+                - num_functions() != 0
+            ensures
+                - Generates and returns a function evaluation request.  See the discussion
+                  in the WHAT THIS OBJECT REPRESENTS section above for details.
+        !*/
+
+        double get_pure_random_search_probability (
+        ) const; 
+        /*!
+            ensures
+                - When we decide to do a global explore step we will, with probability
+                  get_pure_random_search_probability(), sample a point completely at random
+                  rather than using the upper bounding model.  Therefore, if you set this
+                  probability to 0 then we will depend entirely on the upper bounding
+                  model.  Alternatively, if you set get_pure_random_search_probability() to
+                  1 then we won't use the upper bounding model at all and instead use pure
+                  random search to do global exploration.  Pure random search is much
+                  faster than using the upper bounding model, so if you know that your
+                  objective function is especially simple it can be faster to use pure
+                  random search.  However, if you really know your function that well you
+                  should probably use a gradient based optimizer :)
+        !*/
+
+        void set_pure_random_search_probability (
+            double prob
+        );
+        /*!
+            requires
+                - prob >= 0
+            ensures
+                - #get_pure_random_search_probability() == prob
+        !*/
+
+        double get_solver_epsilon (
+        ) const; 
+        /*!
+            ensures
+                - As discussed in the WHAT THIS OBJECT REPRESENTS section, we only do a
+                  local refinement step if we haven't already found the peak of the current
+                  local optima.  get_solver_epsilon() sets the tolerance for deciding if
+                  the local search method has found the local optima.  Therefore, when the
+                  local trust region model runs we check if its predicted improvement in
+                  the objective function is greater than get_solver_epsilon().  If it isn't
+                  then we assume it has converged and we should focus entirely on global
+                  search.
+
+                  This means that, for instance, setting get_solver_epsilon() to 0
+                  essentially instructs the solver to find each local optima to full
+                  floating point precision and only then to focus on pure global search.
+        !*/
+
+        void set_solver_epsilon (
+            double eps
+        );
+        /*!
+            requires
+                - eps >= 0
+            ensures
+                - #get_solver_epsilon() == eps 
+        !*/
+
+        double get_relative_noise_magnitude (
+        ) const; 
+        /*!
+            ensures
+                - Returns the value of the relative noise magnitude parameter to the
+                  dlib::upper_bound_function's used by this object.  See the
+                  upper_bound_function's documentation for a detailed discussion of this
+                  parameter's meaning.  Most users should leave this value as its default
+                  setting.
+        !*/
+
+        void set_relative_noise_magnitude (
+            double value
+        );
+        /*!
+            requires
+                - value >= 0
+            ensures
+                - #get_relative_noise_magnitude() == value 
+        !*/
+
+        size_t get_monte_carlo_upper_bound_sample_num (
+        ) const; 
+        /*!
+            ensures
+                - To find the point that maximizes the upper bounding model we use
+                  get_monte_carlo_upper_bound_sample_num() random evaluations and select
+                  the largest upper bound from that set.   So this parameter influences how
+                  well we estimate the maximum point on the upper bounding model. 
+        !*/
+
+        void set_monte_carlo_upper_bound_sample_num (
+            size_t num
+        );
+        /*!
+            requires
+                - num > 0
+            ensures
+                - #get_monte_carlo_upper_bound_sample_num() == num 
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_GLOBAL_FuNCTION_SEARCH_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/global_optimization/upper_bound_function.h b/ml/dlib/dlib/global_optimization/upper_bound_function.h
new file mode 100644
index 000000000..d1957623e
--- /dev/null
+++ b/ml/dlib/dlib/global_optimization/upper_bound_function.h
@@ -0,0 +1,286 @@
+// Copyright (C) 2017  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_UPPER_bOUND_FUNCTION_Hh_
+#define DLIB_UPPER_bOUND_FUNCTION_Hh_
+
+#include "upper_bound_function_abstract.h"
+#include "../svm/svm_c_linear_dcd_trainer.h"
+#include "../statistics.h"
+#include <limits>
+#include <utility>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    struct function_evaluation
+    {
+        function_evaluation() = default;
+        function_evaluation(const matrix<double,0,1>& x, double y) :x(x), y(y) {}
+
+        matrix<double,0,1> x;
+        double y = std::numeric_limits<double>::quiet_NaN();
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    class upper_bound_function
+    {
+
+    public:
+
+        upper_bound_function(
+        ) = default;
+
+        upper_bound_function(
+            const double relative_noise_magnitude,
+            const double solver_eps 
+        ) : relative_noise_magnitude(relative_noise_magnitude), solver_eps(solver_eps)
+        {
+            DLIB_CASSERT(relative_noise_magnitude >= 0);
+            DLIB_CASSERT(solver_eps > 0);
+        }
+
+        explicit upper_bound_function(
+            const std::vector<function_evaluation>& _points,
+            const double relative_noise_magnitude = 0.001,
+            const double solver_eps = 0.0001
+        ) : relative_noise_magnitude(relative_noise_magnitude), solver_eps(solver_eps), points(_points)
+        {
+            DLIB_CASSERT(relative_noise_magnitude >= 0);
+            DLIB_CASSERT(solver_eps > 0);
+
+            if (points.size() > 1)
+            {
+                DLIB_CASSERT(points[0].x.size() > 0, "The vectors can't be empty.");
+
+                const long dims = points[0].x.size();
+                for (auto& p : points)
+                    DLIB_CASSERT(p.x.size() == dims, "All the vectors given to upper_bound_function must have the same dimensionality.");
+
+                learn_params();
+            }
+
+        }
+
+        void add (
+            const function_evaluation& point
+        )
+        {
+            DLIB_CASSERT(point.x.size() != 0, "The vectors can't be empty.");
+            if (points.size() == 0)
+            {
+                points.push_back(point);
+                return;
+            }
+
+            DLIB_CASSERT(point.x.size() == dimensionality(), "All the vectors given to upper_bound_function must have the same dimensionality.");
+
+            if (points.size() < 4)
+            {
+                points.push_back(point);
+                *this = upper_bound_function(points, relative_noise_magnitude, solver_eps);
+                return;
+            }
+
+            points.push_back(point);
+            // add constraints between the new point and the old points
+            for (size_t i = 0; i < points.size()-1; ++i)
+                active_constraints.push_back(std::make_pair(i,points.size()-1));
+
+            learn_params();
+        }
+
+        long num_points(
+        ) const 
+        { 
+            return points.size(); 
+        }
+
+        long dimensionality(
+        ) const
+        { 
+            if (points.size() == 0)
+                return 0;
+            else
+                return points[0].x.size();
+        }
+
+        const std::vector<function_evaluation>& get_points(
+        ) const 
+        { 
+            return points; 
+        }
+
+        double operator() (
+            const matrix<double,0,1>& x
+        ) const
+        {
+            DLIB_CASSERT(num_points() > 0);
+            DLIB_CASSERT(x.size() == dimensionality());
+
+
+
+            double upper_bound = std::numeric_limits<double>::infinity();
+
+            for (size_t i = 0; i < points.size(); ++i)
+            {
+                const double local_bound = points[i].y + std::sqrt(offsets[i] + dot(slopes, squared(x-points[i].x)));
+                upper_bound = std::min(upper_bound, local_bound);
+            }
+
+            return upper_bound;
+        }
+
+    private:
+
+        void learn_params (
+        )
+        {
+            const long dims = points[0].x.size();
+
+            using sample_type = std::vector<std::pair<size_t,double>>;
+            using kernel_type = sparse_linear_kernel<sample_type>;
+            std::vector<sample_type> x;
+            std::vector<double> y;
+
+            // We are going to normalize the data so the values aren't extreme.  First, we
+            // collect statistics on our data.
+            std::vector<running_stats<double>> x_rs(dims);
+            running_stats<double> y_rs;
+            for (auto& v : points)
+            {
+                for (long i = 0; i < v.x.size(); ++i)
+                    x_rs[i].add(v.x(i));
+                y_rs.add(v.y);
+            }
+
+
+            // compute normalization vectors for the data.  The only reason we do this is
+            // to make the optimization well conditioned.  In particular, scaling the y
+            // values will prevent numerical errors in the 1-diff*diff computation below that
+            // would otherwise result when diff is really big.  Also, scaling the xvalues
+            // to be about 1 will similarly make the optimization more stable and it also
+            // has the added benefit of keeping the relative_noise_magnitude's scale
+            // constant regardless of the size of x values.
+            const double yscale = 1.0/y_rs.stddev();
+            std::vector<double> xscale(dims);
+            for (size_t i = 0; i < xscale.size(); ++i)
+                xscale[i] = 1.0/(x_rs[i].stddev()*yscale); // make it so that xscale[i]*yscale ==  1/x_rs[i].stddev()
+
+            sample_type samp;
+            auto add_constraint = [&](long i, long j) {
+                samp.clear();
+                for (long k = 0; k < dims; ++k)
+                {
+                    double temp = (points[i].x(k) - points[j].x(k))*xscale[k]*yscale;
+                    samp.push_back(std::make_pair(k, temp*temp));
+                }
+
+                if (points[i].y > points[j].y)
+                    samp.push_back(std::make_pair(dims + j, relative_noise_magnitude));
+                else
+                    samp.push_back(std::make_pair(dims + i, relative_noise_magnitude));
+
+                const double diff = (points[i].y - points[j].y)*yscale;
+                samp.push_back(std::make_pair(dims + points.size(), 1-diff*diff));
+
+                x.push_back(samp);
+                y.push_back(1);
+            };
+
+            if (active_constraints.size() == 0)
+            {
+                x.reserve(points.size()*(points.size()-1)/2);
+                y.reserve(points.size()*(points.size()-1)/2);
+                for (size_t i = 0; i < points.size(); ++i)
+                {
+                    for (size_t j = i+1; j < points.size(); ++j)
+                    {
+                        add_constraint(i,j);
+                    }
+                }
+            }
+            else
+            {
+                for (auto& p : active_constraints)
+                    add_constraint(p.first, p.second);
+            }
+
+
+
+
+            svm_c_linear_dcd_trainer<kernel_type> trainer;
+            trainer.set_c(std::numeric_limits<double>::infinity());
+            //trainer.be_verbose();
+            trainer.force_last_weight_to_1(true);
+            trainer.set_epsilon(solver_eps);
+
+            svm_c_linear_dcd_trainer<kernel_type>::optimizer_state state;
+            auto df = trainer.train(x,y, state);
+
+            // save the active constraints for later so we can use them inside add() to add
+            // new points efficiently.
+            if (active_constraints.size() == 0)
+            {
+                long k = 0;
+                for (size_t i = 0; i < points.size(); ++i)
+                {
+                    for (size_t j = i+1; j < points.size(); ++j)
+                    {
+                        if (state.get_alpha()[k++] != 0)
+                            active_constraints.push_back(std::make_pair(i,j));
+                    }
+                }
+            }
+            else
+            {
+                DLIB_CASSERT(state.get_alpha().size() == active_constraints.size());
+                new_active_constraints.clear();
+                for (size_t i = 0; i < state.get_alpha().size(); ++i)
+                {
+                    if (state.get_alpha()[i] != 0)
+                        new_active_constraints.push_back(active_constraints[i]);
+                }
+                active_constraints.swap(new_active_constraints);
+            }
+
+            //std::cout << "points.size(): " << points.size() << std::endl;
+            //std::cout << "active_constraints.size(): " << active_constraints.size() << std::endl;
+
+
+            const auto& bv = df.basis_vectors(0);
+            slopes.set_size(dims);
+            for (long i = 0; i < dims; ++i)
+                slopes(i) = bv[i].second*xscale[i]*xscale[i];
+
+            //std::cout << "slopes:" << trans(slopes);
+
+            offsets.assign(points.size(),0);
+
+
+            for (size_t i = 0; i < points.size(); ++i)
+            {
+                offsets[i] += bv[slopes.size()+i].second*relative_noise_magnitude;
+            }
+        }
+
+
+
+        double relative_noise_magnitude = 0.001;
+        double solver_eps = 0.0001; 
+        std::vector<std::pair<size_t,size_t>> active_constraints, new_active_constraints;
+
+        std::vector<function_evaluation> points;
+        std::vector<double> offsets; // offsets.size() == points.size()
+        matrix<double,0,1> slopes; // slopes.size() == points[0].first.size()
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_UPPER_bOUND_FUNCTION_Hh_
+
+
diff --git a/ml/dlib/dlib/global_optimization/upper_bound_function_abstract.h b/ml/dlib/dlib/global_optimization/upper_bound_function_abstract.h
new file mode 100644
index 000000000..56b361597
--- /dev/null
+++ b/ml/dlib/dlib/global_optimization/upper_bound_function_abstract.h
@@ -0,0 +1,212 @@
+// Copyright (C) 2017  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_UPPER_bOUND_FUNCTION_ABSTRACT_Hh_
+#ifdef DLIB_UPPER_bOUND_FUNCTION_ABSTRACT_Hh_
+
+#include "../matrix.h"
+#include <limits>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    struct function_evaluation
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object records the output of a real valued function in response to
+                some input. 
+
+                In particular, if you have a function F(x) then the function_evaluation is
+                simply a struct that records x and the scalar value F(x).
+        !*/
+
+        function_evaluation() = default;
+        function_evaluation(const matrix<double,0,1>& x, double y) :x(x), y(y) {}
+
+        matrix<double,0,1> x;
+        double y = std::numeric_limits<double>::quiet_NaN();
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    class upper_bound_function
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a piecewise linear non-parametric function that can
+                be used to define an upper bound on some more complex and unknown function.
+                To describe this precisely, lets assume there is a function F(x) which you
+                are capable of sampling from but otherwise know nothing about, and that you
+                would like to find an upper bounding function U(x) such that U(x) >= F(x)
+                for any x.  It would also be good if U(x)-F(x) was minimal.  I.e. we would
+                like U(x) to be a tight upper bound, not something vacuous like U(x) =
+                infinity.
+
+                The upper_bound_function class is a tool for creating this kind of upper
+                bounding function from a set of function_evaluations of F(x).  We do this
+                by considering only U(x) of the form:
+                    U = [](matrix<double,0,1> x) {
+                       double min_ub = infinity;
+                       for (size_t i = 0; i < POINTS.size(); ++i) {
+                            function_evaluation p = POINTS[i]
+                            double local_bound = p.y + sqrt(noise_terms[i] + trans(p.x-x)*M*(p.x-x))
+                            min_ub = min(min_ub, local_bound)
+                        }
+                        return min_ub;
+                    }
+                Where POINTS is an array of function_evaluation instances drawn from F(x),
+                M is a diagonal matrix, and noise_terms is an array of scalars.
+
+                To create an upper bound U(x), the upper_bound_function takes a POINTS array
+                containing evaluations of F(x) as input and solves the following quadratic
+                program to find the parameters of U(x):
+                    
+                    min_{M,noise_terms}:  sum(squared(M)) + sum(squared(noise_terms/relative_noise_magnitude))
+                    s.t.   U(POINTS[i].x) >= POINTS[i].y,  for all i 
+                           noise_terms[i] >= 0
+                           min(M) >= 0
+                           M is a diagonal matrix 
+               
+                Therefore, the quadratic program finds the U(x) that always upper bounds
+                F(x) on the supplied POINTS, but is otherwise as small as possible.
+
+
+
+                The inspiration for the upper_bound_function object came from the AdaLIPO
+                algorithm from this excellent paper:
+                    Global optimization of Lipschitz functions 
+                    Malherbe, Cédric and Vayatis, Nicolas 
+                    International Conference on Machine Learning - 2017
+                In that paper, they propose to use a simpler U(x) where noise_terms is
+                always 0 and M is a diagonal matrix where each diagonal element is the same
+                value.  Therefore, there is only a single scalar parameter for U(x) in
+                their formulation of the problem.  This causes difficulties if F(x) is
+                stochastic or has discontinuities since, without the noise term, M will
+                become really huge and the upper bound becomes vacuously large.  It is also
+                problematic if the gradient of F(x) with respect to x contains elements of
+                widely varying magnitude since the simpler formulation of U(x) assumes a
+                uniform rate of change regardless of which dimension is varying. 
+        !*/
+
+    public:
+
+        upper_bound_function(
+        );
+        /*!
+            ensures
+                - #num_points() == 0
+                - #dimensionality() == 0
+        !*/
+
+        explicit upper_bound_function(
+            const std::vector<function_evaluation>& points,
+            const double relative_noise_magnitude = 0.001,
+            const double solver_eps = 0.0001
+        );
+        /*!
+            requires
+                - all the x vectors in points must have the same non-zero dimensionality.
+                - relative_noise_magnitude >= 0
+                - solver_eps > 0
+            ensures
+                - Creates an upper bounding function U(x), as described above, assuming that
+                  the given points are drawn from F(x).
+                - Uses the provided relative_noise_magnitude when solving the QP, as
+                  described above.  Note that relative_noise_magnitude can be set to 0.  If
+                  you do this then all the noise terms are constrained to 0.  You should
+                  only do this if you know F(x) is non-stochastic and continuous
+                  everywhere.
+                - When solving the QP used to find the parameters of U(x), the upper
+                  bounding function, we solve the QP to solver_eps accuracy.   It's
+                  possible that large enough solver_eps can lead to upper bounds that don't
+                  upper bound all the supplied points.  But for reasonable epsilon values
+                  this shouldn't be a problem. 
+                - #num_points() == points.size()
+                - #dimensionality() == points[0].x.size()
+        !*/
+
+        upper_bound_function(
+            const double relative_noise_magnitude,
+            const double solver_eps 
+        );
+        /*!
+            requires
+                - relative_noise_magnitude >= 0
+                - solver_eps > 0
+            ensures
+                - #num_points() == 0
+                - #dimensionality() == 0
+                - This destructor is the same as calling the above constructor with points.size()==0
+        !*/
+
+
+        void add (
+            const function_evaluation& point
+        );
+        /*!
+            requires
+                - num_points() == 0 || point.x.size() == dimensionality()
+                - point.x.size() != 0
+            ensures
+                - Adds point to get_points().
+                - Incrementally updates the upper bounding function with the given function
+                  evaluation.  That is, we assume that F(point.x)==point.y and solve the QP
+                  described above to find the new U(x) that upper bounds all the points
+                  this object knows about (i.e. all the points in get_points() and the new point).
+                - Calling add() is much faster than recreating the upper_bound_function
+                  from scratch with all the points.  This is because we warm start with the
+                  previous solution to the QP.  This is done by discarding any non-active
+                  constraints and solving the QP again with only the previously active
+                  constraints and the new constraints formed by all the pairs of the new
+                  point and the old points.  This means the QP solved by add() is much
+                  smaller than the QP that would be solved by a fresh call to the
+                  upper_bound_function constructor.
+        !*/
+
+        const std::vector<function_evaluation>& get_points(
+        ) const;
+        /*!
+            ensures
+                - returns the points from F(x) used to define this upper bounding function.
+                  These are all the function_evaluation objects given to this object via
+                  its constructor and add().
+        !*/
+
+        long num_points(
+        ) const;
+        /*!
+            ensures
+                - returns the number of points used to define the upper bounding function.
+                  (i.e. returns get_points().size())
+        !*/
+
+        long dimensionality(
+        ) const;
+        /*!
+            ensures
+                - returns the dimensionality of the input vectors to the upper bounding function. 
+        !*/
+
+        double operator() (
+            const matrix<double,0,1>& x
+        ) const;
+        /*!
+            requires
+                - num_points() > 0
+                - x.size() == dimensionality()
+            ensures
+                - return U(x)
+                  (i.e. returns the upper bound on F(x) at x given by our upper bounding function)
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_UPPER_bOUND_FUNCTION_ABSTRACT_Hh_
+
+