6 files changed, 1368 insertions, 0 deletions
diff --git a/ml/dlib/dlib/control/approximate_linear_models.h b/ml/dlib/dlib/control/approximate_linear_models.h
new file mode 100644
index 000000000..9732d71e9
--- /dev/null
+++ b/ml/dlib/dlib/control/approximate_linear_models.h
@@ -0,0 +1,128 @@
+// Copyright (C) 2015  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_APPROXIMATE_LINEAR_MODELS_Hh_
+#define DLIB_APPROXIMATE_LINEAR_MODELS_Hh_
+
+#include "approximate_linear_models_abstract.h"
+#include "../matrix.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor
+        >
+    struct process_sample
+    {
+        typedef feature_extractor feature_extractor_type;
+        typedef typename feature_extractor::state_type state_type;
+        typedef typename feature_extractor::action_type action_type;
+
+        process_sample(){}
+
+        process_sample(
+            const state_type& s,
+            const action_type& a,
+            const state_type& n,
+            const double& r
+        ) : state(s), action(a), next_state(n), reward(r) {}
+
+        state_type  state;
+        action_type action;
+        state_type  next_state;
+        double reward;
+    };
+
+    template < typename feature_extractor >
+    void serialize (const process_sample<feature_extractor>& item, std::ostream& out)
+    {
+        serialize(item.state, out);
+        serialize(item.action, out);
+        serialize(item.next_state, out);
+        serialize(item.reward, out);
+    }
+
+    template < typename feature_extractor >
+    void deserialize (process_sample<feature_extractor>& item, std::istream& in)
+    {
+        deserialize(item.state, in);
+        deserialize(item.action, in);
+        deserialize(item.next_state, in);
+        deserialize(item.reward, in);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor
+        >
+    class policy
+    {
+    public:
+
+        typedef feature_extractor feature_extractor_type;
+        typedef typename feature_extractor::state_type state_type;
+        typedef typename feature_extractor::action_type action_type;
+
+
+        policy (
+        )
+        {
+            w.set_size(fe.num_features());
+            w = 0;
+        }
+
+        policy (
+            const matrix<double,0,1>& weights_,
+            const feature_extractor& fe_
+        ) : w(weights_), fe(fe_) {}
+
+        action_type operator() (
+            const state_type& state
+        ) const
+        {
+            return fe.find_best_action(state,w);
+        }
+
+        const feature_extractor& get_feature_extractor (
+        ) const { return fe; }
+
+        const matrix<double,0,1>& get_weights (
+        ) const { return w; }
+
+
+    private:
+        matrix<double,0,1> w;
+        feature_extractor fe;
+    };
+
+    template < typename feature_extractor >
+    inline void serialize(const policy<feature_extractor>& item, std::ostream& out)
+    {
+        int version = 1;
+        serialize(version, out);
+        serialize(item.get_feature_extractor(), out);
+        serialize(item.get_weights(), out);
+    }
+    template < typename feature_extractor >
+    inline void deserialize(policy<feature_extractor>& item, std::istream& in)
+    {
+        int version = 0;
+        deserialize(version, in);
+        if (version != 1)
+            throw serialization_error("Unexpected version found while deserializing dlib::policy object.");
+        feature_extractor fe;
+        matrix<double,0,1> w;
+        deserialize(fe, in);
+        deserialize(w, in);
+        item = policy<feature_extractor>(w,fe);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_APPROXIMATE_LINEAR_MODELS_Hh_
+
diff --git a/ml/dlib/dlib/control/approximate_linear_models_abstract.h b/ml/dlib/dlib/control/approximate_linear_models_abstract.h
new file mode 100644
index 000000000..59dac4276
--- /dev/null
+++ b/ml/dlib/dlib/control/approximate_linear_models_abstract.h
@@ -0,0 +1,213 @@
+// Copyright (C) 2015  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_APPROXIMATE_LINEAR_MODELS_ABSTRACT_Hh_
+#ifdef DLIB_APPROXIMATE_LINEAR_MODELS_ABSTRACT_Hh_
+
+#include "../matrix.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    struct example_feature_extractor 
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object defines the interface a feature extractor must implement if it
+                is to be used with the process_sample and policy objects defined at the
+                bottom of this file.  Moreover, it is meant to represent the core part
+                of a model used in a reinforcement learning algorithm.
+                
+                In particular, this object models a Q(state,action) function where
+                    Q(state,action) == dot(w, PSI(state,action))
+                    where PSI(state,action) is a feature vector and w is a parameter
+                    vector.
+
+                Therefore, a feature extractor defines how the PSI(x,y) feature vector is
+                calculated.  It also defines the types used to represent the state and
+                action objects. 
+
+
+            THREAD SAFETY
+                Instances of this object are required to be threadsafe, that is, it should
+                be safe for multiple threads to make concurrent calls to the member
+                functions of this object.
+        !*/
+
+        // The state and actions can be any types so long as you provide typedefs for them.
+        typedef T state_type;
+        typedef U action_type; 
+        // We can also say that the last element in the weight vector w must be 1.  This
+        // can be useful for including a prior into your model.
+        const static bool force_last_weight_to_1 = false;
+
+        example_feature_extractor(
+        );
+        /*!
+            ensures
+                - this object is properly initialized.
+        !*/
+
+        unsigned long num_features(
+        ) const;
+        /*!
+            ensures
+                - returns the dimensionality of the PSI() feature vector.  
+        !*/
+
+        action_type find_best_action (
+            const state_type& state,
+            const matrix<double,0,1>& w
+        ) const;
+        /*!
+            ensures
+                - returns the action A that maximizes Q(state,A) = dot(w,PSI(state,A)).
+                  That is, this function finds the best action to take in the given state
+                  when our model is parameterized by the given weight vector w.
+        !*/
+
+        void get_features (
+            const state_type& state,
+            const action_type& action,
+            matrix<double,0,1>& feats
+        ) const;
+        /*!
+            ensures
+                - #feats.size() == num_features()
+                - #feats == PSI(state,action)
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor
+        >
+    struct process_sample
+    {
+        /*!
+            REQUIREMENTS ON feature_extractor
+                feature_extractor should implement the example_feature_extractor interface
+                defined at the top of this file.
+
+            WHAT THIS OBJECT REPRESENTS
+                This object holds a training sample for a reinforcement learning algorithm.
+                In particular, it should be a sample from some process where the process
+                was in state this->state, then took this->action action which resulted in
+                receiving this->reward and ending up in the state this->next_state.
+        !*/
+
+        typedef feature_extractor feature_extractor_type;
+        typedef typename feature_extractor::state_type state_type;
+        typedef typename feature_extractor::action_type action_type;
+
+        process_sample(){}
+
+        process_sample(
+            const state_type& s,
+            const action_type& a,
+            const state_type& n,
+            const double& r
+        ) : state(s), action(a), next_state(n), reward(r) {}
+
+        state_type  state;
+        action_type action;
+        state_type  next_state;
+        double reward;
+    };
+
+    template < typename feature_extractor >
+    void serialize (const process_sample<feature_extractor>& item, std::ostream& out);
+    template < typename feature_extractor >
+    void deserialize (process_sample<feature_extractor>& item, std::istream& in);
+    /*!
+        provides serialization support.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor
+        >
+    class policy
+    {
+        /*!
+            REQUIREMENTS ON feature_extractor
+                feature_extractor should implement the example_feature_extractor interface
+                defined at the top of this file.
+
+            WHAT THIS OBJECT REPRESENTS
+                This is a policy based on the supplied feature_extractor model.  In
+                particular, it maps from feature_extractor::state_type to the best action
+                to take in that state.
+        !*/
+
+    public:
+
+        typedef feature_extractor feature_extractor_type;
+        typedef typename feature_extractor::state_type state_type;
+        typedef typename feature_extractor::action_type action_type;
+
+
+        policy (
+        );
+        /*!
+            ensures
+                - #get_feature_extractor() == feature_extractor() 
+                  (i.e. it will have its default value)
+                - #get_weights().size() == #get_feature_extractor().num_features()
+                - #get_weights() == 0
+        !*/
+
+        policy (
+            const matrix<double,0,1>& weights,
+            const feature_extractor& fe
+        ); 
+        /*!
+            requires
+                - fe.num_features() == weights.size()
+            ensures
+                - #get_feature_extractor() == fe
+                - #get_weights() == weights
+        !*/
+
+        action_type operator() (
+            const state_type& state
+        ) const;
+        /*!
+            ensures
+                - returns get_feature_extractor().find_best_action(state,w);
+        !*/
+
+        const feature_extractor& get_feature_extractor (
+        ) const; 
+        /*!
+            ensures
+                - returns the feature extractor used by this object
+        !*/
+
+        const matrix<double,0,1>& get_weights (
+        ) const; 
+        /*!
+            ensures
+                - returns the parameter vector (w) associated with this object.  The length
+                  of the vector is get_feature_extractor().num_features().  
+        !*/
+
+    };
+
+    template < typename feature_extractor >
+    void serialize(const policy<feature_extractor>& item, std::ostream& out);
+    template < typename feature_extractor >
+    void deserialize(policy<feature_extractor>& item, std::istream& in);
+    /*!
+        provides serialization support.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+
+#endif // DLIB_APPROXIMATE_LINEAR_MODELS_ABSTRACT_Hh_
+ 
diff --git a/ml/dlib/dlib/control/lspi.h b/ml/dlib/dlib/control/lspi.h
new file mode 100644
index 000000000..b21a501d2
--- /dev/null
+++ b/ml/dlib/dlib/control/lspi.h
@@ -0,0 +1,188 @@
+// Copyright (C) 2015  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_LSPI_Hh_
+#define DLIB_LSPI_Hh_
+
+#include "lspi_abstract.h"
+#include "approximate_linear_models.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor
+        >
+    class lspi
+    {
+    public:
+        typedef feature_extractor feature_extractor_type;
+        typedef typename feature_extractor::state_type state_type;
+        typedef typename feature_extractor::action_type action_type;
+
+        explicit lspi(
+            const feature_extractor& fe_
+        ) : fe(fe_)
+        {
+            init();
+        }
+
+        lspi(
+        )
+        {
+            init();
+        }
+
+        double get_discount (
+        ) const { return discount; }
+
+        void set_discount (
+            double value
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(0 < value && value <= 1,
+                "\t void lspi::set_discount(value)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t value: " << value 
+                );
+            discount = value;
+        }
+
+        const feature_extractor& get_feature_extractor (
+        ) const { return fe; }
+
+        void be_verbose (
+        )
+        {
+            verbose = true;
+        }
+
+        void be_quiet (
+        )
+        {
+            verbose = false;
+        }
+
+        void set_epsilon (
+            double eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\t void lspi::set_epsilon(eps_)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t eps_: " << eps_ 
+                );
+            eps = eps_;
+        }
+
+        double get_epsilon (
+        ) const
+        { 
+            return eps;
+        }
+
+        void set_lambda (
+            double lambda_ 
+        ) 
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(lambda_ >= 0,
+                "\t void lspi::set_lambda(lambda_)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t lambda_: " << lambda_ 
+                );
+            lambda = lambda_;
+        }
+
+        double get_lambda (
+        ) const 
+        { 
+            return lambda; 
+        }
+
+        void set_max_iterations (
+            unsigned long max_iter
+        ) { max_iterations = max_iter; }
+
+        unsigned long get_max_iterations (
+        ) { return max_iterations; }
+
+        template <typename vector_type>
+        policy<feature_extractor> train (
+            const vector_type& samples
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(samples.size() > 0,
+                "\t policy lspi::train(samples)"
+                << "\n\t invalid inputs were given to this function"
+                );
+
+            matrix<double,0,1> w(fe.num_features());
+            w = 0;
+            matrix<double,0,1> prev_w, b, f1, f2;
+
+            matrix<double> A;
+
+            double change; 
+            unsigned long iter = 0;
+            do
+            {
+                A = identity_matrix<double>(fe.num_features())*lambda;
+                b = 0;
+                for (unsigned long i = 0; i < samples.size(); ++i)
+                {
+                    fe.get_features(samples[i].state, samples[i].action, f1);
+                    fe.get_features(samples[i].next_state, 
+                                    fe.find_best_action(samples[i].next_state,w), 
+                                    f2);
+                    A += f1*trans(f1 - discount*f2);
+                    b += f1*samples[i].reward;
+                }
+
+                prev_w = w;
+                if (feature_extractor::force_last_weight_to_1)
+                    w = join_cols(pinv(colm(A,range(0,A.nc()-2)))*(b-colm(A,A.nc()-1)),mat(1.0));
+                else
+                    w = pinv(A)*b;
+
+                change = length(w-prev_w);
+                ++iter;
+
+                if (verbose)
+                    std::cout << "iteration: " << iter << "\tchange: " << change << std::endl;
+
+            } while(change > eps && iter < max_iterations);
+
+            return policy<feature_extractor>(w,fe);
+        }
+
+
+    private:
+
+        void init()
+        {
+            lambda = 0.01;
+            discount = 0.8;
+            eps = 0.01;
+            verbose = false;
+            max_iterations = 100;
+        }
+
+        double lambda;
+        double discount;
+        double eps;
+        bool verbose;
+        unsigned long max_iterations;
+        feature_extractor fe;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_LSPI_Hh_
+
diff --git a/ml/dlib/dlib/control/lspi_abstract.h b/ml/dlib/dlib/control/lspi_abstract.h
new file mode 100644
index 000000000..f262d16f4
--- /dev/null
+++ b/ml/dlib/dlib/control/lspi_abstract.h
@@ -0,0 +1,193 @@
+// Copyright (C) 2015  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_LSPI_ABSTRACT_Hh_
+#ifdef DLIB_LSPI_ABSTRACT_Hh_
+
+#include "approximate_linear_models_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor
+        >
+    class lspi
+    {
+        /*!
+            REQUIREMENTS ON feature_extractor
+                feature_extractor should implement the example_feature_extractor interface
+                defined at the top of dlib/control/approximate_linear_models_abstract.h
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is an implementation of the reinforcement learning algorithm
+                described in the following paper:
+                    Lagoudakis, Michail G., and Ronald Parr. "Least-squares policy
+                    iteration." The Journal of Machine Learning Research 4 (2003):
+                    1107-1149.
+                
+                This means that it takes a bunch of training data in the form of
+                process_samples and outputs a policy that hopefully performs well when run
+                on the process that generated those samples.
+        !*/
+
+    public:
+        typedef feature_extractor feature_extractor_type;
+        typedef typename feature_extractor::state_type state_type;
+        typedef typename feature_extractor::action_type action_type;
+
+        explicit lspi(
+            const feature_extractor& fe_
+        ); 
+        /*!
+            ensures
+                - #get_feature_extractor() == fe_
+                - #get_lambda() == 0.01
+                - #get_discount == 0.8
+                - #get_epsilon() == 0.01
+                - is not verbose
+                - #get_max_iterations() == 100
+        !*/
+
+        lspi(
+        );
+        /*!
+            ensures
+                - #get_feature_extractor() == feature_extractor() 
+                  (i.e. it will have its default value)
+                - #get_lambda() == 0.01
+                - #get_discount == 0.8
+                - #get_epsilon() == 0.01
+                - is not verbose
+                - #get_max_iterations() == 100
+        !*/
+
+        double get_discount (
+        ) const;
+        /*!
+            ensures
+                - returns the discount applied to the sum of rewards in the Bellman
+                  equation.
+        !*/
+
+        void set_discount (
+            double value
+        );
+        /*!
+            requires
+                - 0 < value <= 1
+            ensures
+                - #get_discount() == value
+        !*/
+
+        const feature_extractor& get_feature_extractor (
+        ) const;
+        /*!
+            ensures
+                - returns the feature extractor used by this object
+        !*/
+
+        void be_verbose (
+        );
+        /*!
+            ensures
+                - This object will print status messages to standard out so that a 
+                  user can observe the progress of the algorithm.
+        !*/
+
+        void be_quiet (
+        );
+        /*!
+            ensures
+                - this object will not print anything to standard out
+        !*/
+
+        void set_epsilon (
+            double eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon() == eps
+        !*/
+
+        double get_epsilon (
+        ) const;
+        /*!
+            ensures
+                - returns the error epsilon that determines when training should stop.
+                  Smaller values may result in a more accurate solution but take longer to
+                  train.  
+        !*/
+
+        void set_lambda (
+            double lambda_ 
+        ); 
+        /*!
+            requires
+                - lambda >= 0
+            ensures
+                - #get_lambda() == lambda 
+        !*/
+
+        double get_lambda (
+        ) const;
+        /*!
+            ensures
+                - returns the regularization parameter.  It is the parameter that 
+                  determines the trade off between trying to fit the training data 
+                  exactly or allowing more errors but hopefully improving the 
+                  generalization ability of the resulting function.  Smaller values 
+                  encourage exact fitting while larger values of lambda may encourage 
+                  better generalization. 
+        !*/
+
+        void set_max_iterations (
+            unsigned long max_iter
+        ); 
+        /*!
+            ensures
+                - #get_max_iterations() == max_iter
+        !*/
+
+        unsigned long get_max_iterations (
+        ); 
+        /*!
+            ensures
+                - returns the maximum number of iterations the SVM optimizer is allowed to
+                  run before it is required to stop and return a result.
+        !*/
+
+        template <
+            typename vector_type
+            >
+        policy<feature_extractor> train (
+            const vector_type& samples
+        ) const;
+        /*!
+            requires
+                - samples.size() > 0
+                - samples is something with an interface that looks like 
+                  std::vector<process_sample<feature_extractor>>.  That is, it should
+                  be some kind of array of process_sample objects.
+            ensures
+                - Trains a policy based on the given data and returns the results.  The
+                  idea is to find a policy that will obtain the largest possible reward
+                  when run on the process that generated the samples.  In particular, 
+                  if the returned policy is P then:
+                    - P(S) == the best action to take when in state S.
+                    - if (feature_extractor::force_last_weight_to_1) then
+                        - The last element of P.get_weights() is 1. 
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_LSPI_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/control/mpc.h b/ml/dlib/dlib/control/mpc.h
new file mode 100644
index 000000000..48ef2b72d
--- /dev/null
+++ b/ml/dlib/dlib/control/mpc.h
@@ -0,0 +1,370 @@
+// Copyright (C) 2015  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_MPC_Hh_
+#define DLIB_MPC_Hh_
+
+#include "mpc_abstract.h"
+#include "../matrix.h"
+#include "../algs.h"
+
+
+namespace dlib
+{
+    template <
+        long S_,
+        long I_,
+        unsigned long horizon_
+        >
+    class mpc
+    {
+
+    public:
+
+        const static long S = S_;
+        const static long I = I_;
+        const static unsigned long horizon = horizon_;
+
+        mpc(
+        ) 
+        {
+            A = 0;
+            B = 0;
+            C = 0;
+            Q = 0;
+            R = 0;
+            lower = 0;
+            upper = 0;
+
+            max_iterations = 0;
+            eps = 0.01;
+            for (unsigned long i = 0; i < horizon; ++i)
+            {
+                target[i].set_size(A.nr());
+                target[i] = 0;
+
+                controls[i].set_size(B.nc());
+                controls[i] = 0;
+            }
+            lambda = 0;
+        }
+
+        mpc (
+            const matrix<double,S,S>& A_,
+            const matrix<double,S,I>& B_,
+            const matrix<double,S,1>& C_,
+            const matrix<double,S,1>& Q_,
+            const matrix<double,I,1>& R_,
+            const matrix<double,I,1>& lower_,
+            const matrix<double,I,1>& upper_
+        ) : A(A_), B(B_), C(C_), Q(Q_), R(R_), lower(lower_), upper(upper_)
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(A.nr() > 0 && B.nc() > 0,
+                "\t mpc::mpc()"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t A.nr(): " <<  A.nr()
+                << "\n\t B.nc(): " <<  B.nc()
+                );
+
+            DLIB_ASSERT(A.nr() == A.nc() && 
+                        A.nr() == B.nr() && 
+                        A.nr() == C.nr() && 
+                        A.nr() == Q.nr(),
+                "\t mpc::mpc()"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t A.nr(): " <<  A.nr()
+                << "\n\t A.nc(): " <<  A.nc()
+                << "\n\t B.nr(): " <<  B.nr()
+                << "\n\t C.nr(): " <<  C.nr()
+                << "\n\t Q.nr(): " <<  Q.nr()
+                );
+            DLIB_ASSERT(
+                        B.nc() == R.nr() && 
+                        B.nc() == lower.nr() && 
+                        B.nc() == upper.nr() ,
+                "\t mpc::mpc()"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t B.nr(): " <<  B.nr()
+                << "\n\t B.nc(): " <<  B.nc()
+                << "\n\t lower.nr(): " <<  lower.nr()
+                << "\n\t upper.nr(): " <<  upper.nr()
+                );
+            DLIB_ASSERT(min(Q) >= 0 &&
+                        min(R) >  0 &&
+                        min(upper-lower) >= 0,
+                "\t mpc::mpc()"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t min(Q): " << min(Q) 
+                << "\n\t min(R): " << min(R) 
+                << "\n\t min(upper-lower): " << min(upper-lower) 
+                );
+
+
+            max_iterations = 10000;
+            eps = 0.01;
+            for (unsigned long i = 0; i < horizon; ++i)
+            {
+                target[i].set_size(A.nr());
+                target[i] = 0;
+
+                controls[i].set_size(B.nc());
+                controls[i] = 0;
+            }
+
+            // Bound the maximum eigenvalue of the hessian by computing the trace of the
+            // hessian matrix. 
+            lambda = sum(R)*horizon;
+            matrix<double,S,S> temp = diagm(Q);
+            for (unsigned long c = 0; c < horizon; ++c)
+            {
+                lambda += trace(trans(B)*temp*B);
+                Q_diag[horizon-c-1] = diag(trans(B)*temp*B);
+                temp = trans(A)*temp*A + diagm(Q);
+            }
+
+        }
+
+        const matrix<double,S,S>& get_A (
+        ) const { return A; }
+        const matrix<double,S,I>& get_B (
+        ) const { return B; }
+        const matrix<double,S,1>& get_C (
+        ) const { return C; }
+        const matrix<double,S,1>& get_Q (
+        ) const { return Q; }
+        const matrix<double,I,1>& get_R (
+        ) const { return R; }
+        const matrix<double,I,1>& get_lower_constraints (
+        ) const { return lower; }
+        const matrix<double,I,1>& get_upper_constraints (
+        ) const { return upper; }
+
+        void set_target (
+            const matrix<double,S,1>& val,
+            const unsigned long time
+        )
+        {
+            DLIB_ASSERT(time < horizon,
+                "\t void mpc::set_target(eps_)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t time: " << time 
+                << "\n\t horizon: " << horizon 
+                );
+
+            target[time] = val;
+        }
+
+        void set_target (
+            const matrix<double,S,1>& val
+        )
+        {
+            for (unsigned long i = 0; i < horizon; ++i)
+                target[i] = val;
+        }
+
+        void set_last_target (
+            const matrix<double,S,1>& val
+        )
+        {
+            set_target(val, horizon-1);
+        }
+
+        const matrix<double,S,1>& get_target (
+            const unsigned long time
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(time < horizon,
+                "\t matrix mpc::get_target(eps_)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t time: " << time 
+                << "\n\t horizon: " << horizon 
+                );
+
+            return target[time];
+        }
+
+        unsigned long get_max_iterations (
+        ) const { return max_iterations; }
+
+        void set_max_iterations (
+            unsigned long max_iter
+        ) 
+        {
+            max_iterations = max_iter;
+        }
+
+        void set_epsilon (
+            double eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\t void mpc::set_epsilon(eps_)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t eps_: " << eps_ 
+                );
+            eps = eps_;
+        }
+
+        double get_epsilon (
+        ) const
+        { 
+            return eps;
+        }
+
+        matrix<double,I,1> operator() (
+            const matrix<double,S,1>& current_state
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(min(R) > 0 && A.nr() == current_state.size(),
+                "\t matrix mpc::operator(current_state)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t min(R): " << min(R) 
+                << "\n\t A.nr(): " << A.nr() 
+                << "\n\t current_state.size(): " << current_state.size() 
+                );
+
+            // Shift the inputs over by one time step so we can use them to warm start the
+            // optimizer.
+            for (unsigned long i = 1; i < horizon; ++i)
+                controls[i-1] = controls[i];
+
+            solve_linear_mpc(current_state);
+
+            for (unsigned long i = 1; i < horizon; ++i)
+                target[i-1] = target[i];
+
+            return controls[0];
+        }
+
+    private:
+
+
+        // These temporary variables here just to avoid reallocating them on each call to
+        // operator().
+        matrix<double,S,1> M[horizon];
+        matrix<double,I,1> MM[horizon];
+        matrix<double,I,1> df[horizon]; 
+        matrix<double,I,1> v[horizon]; 
+        matrix<double,I,1> v_old[horizon]; 
+
+        void solve_linear_mpc (
+            const matrix<double,S,1>& initial_state
+        )
+        {
+            // make it so MM == trans(K)*Q*(M-target)
+            M[0] = A*initial_state + C;
+            for (unsigned long i = 1; i < horizon; ++i)
+                M[i] = A*M[i-1] + C;
+            for (unsigned long i = 0; i < horizon; ++i)
+                M[i] = diagm(Q)*(M[i]-target[i]);
+            for (long i = (long)horizon-2; i >= 0; --i)
+                M[i] += trans(A)*M[i+1];
+            for (unsigned long i = 0; i < horizon; ++i)
+                MM[i] = trans(B)*M[i];
+
+
+
+            unsigned long iter = 0;
+            for (; iter < max_iterations; ++iter)
+            {
+                // compute current gradient and put it into df.
+                // df == H*controls + MM;
+                M[0] = B*controls[0];
+                for (unsigned long i = 1; i < horizon; ++i)
+                    M[i] = A*M[i-1] + B*controls[i];
+                for (unsigned long i = 0; i < horizon; ++i)
+                    M[i] = diagm(Q)*M[i];
+                for (long i = (long)horizon-2; i >= 0; --i)
+                    M[i] += trans(A)*M[i+1];
+                for (unsigned long i = 0; i < horizon; ++i)
+                    df[i] = MM[i] + trans(B)*M[i] + diagm(R)*controls[i];
+
+
+
+                // Check the stopping condition, which is the magnitude of the largest element
+                // of the gradient.
+                double max_df = 0;
+                unsigned long max_t = 0;
+                long max_v = 0;
+                for (unsigned long i = 0; i < horizon; ++i)
+                {
+                    for (long j = 0; j < controls[i].size(); ++j)
+                    {
+                        // if this variable isn't an active constraint then we care about it's
+                        // derivative.
+                        if (!((controls[i](j) <= lower(j) && df[i](j) > 0) || 
+                              (controls[i](j) >= upper(j) && df[i](j) < 0)))
+                        {
+                            if (std::abs(df[i](j)) > max_df)
+                            {
+                                max_df = std::abs(df[i](j));
+                                max_t = i;
+                                max_v = j;
+                            }
+                        }
+                    }
+                }
+                if (max_df < eps)
+                    break;
+
+
+
+                // We will start out by doing a little bit of coordinate descent because it
+                // allows us to optimize individual variables exactly.  Since we are warm
+                // starting each iteration with a really good solution this helps speed
+                // things up a lot.
+                const unsigned long smo_iters = 50;
+                if (iter < smo_iters)
+                {
+                    if (Q_diag[max_t](max_v) == 0) continue;
+
+                    // Take the optimal step but just for one variable.
+                    controls[max_t](max_v) = -(df[max_t](max_v)-Q_diag[max_t](max_v)*controls[max_t](max_v))/Q_diag[max_t](max_v);
+                    controls[max_t](max_v) = put_in_range(lower(max_v), upper(max_v), controls[max_t](max_v));
+
+                    // If this is the last SMO iteration then don't forget to initialize v
+                    // for the gradient steps.
+                    if (iter+1 == smo_iters)
+                    {
+                        for (unsigned long i = 0; i < horizon; ++i)
+                            v[i] = controls[i];
+                    }
+                }
+                else
+                {
+                    // Take a projected gradient step.
+                    for (unsigned long i = 0; i < horizon; ++i)
+                    {
+                        v_old[i] = v[i];
+                        v[i] = dlib::clamp(controls[i] - 1.0/lambda * df[i], lower, upper);
+                        controls[i] = dlib::clamp(v[i] + (std::sqrt(lambda)-1)/(std::sqrt(lambda)+1)*(v[i]-v_old[i]), lower, upper);
+                    }
+                }
+            }
+        }
+
+        unsigned long max_iterations;
+        double eps;
+
+        matrix<double,S,S> A;
+        matrix<double,S,I> B;
+        matrix<double,S,1> C;
+        matrix<double,S,1> Q;
+        matrix<double,I,1> R;
+        matrix<double,I,1> lower;
+        matrix<double,I,1> upper;
+        matrix<double,S,1> target[horizon]; 
+
+        double lambda; // abound on the largest eigenvalue of the hessian matrix.
+        matrix<double,I,1> Q_diag[horizon]; 
+        matrix<double,I,1> controls[horizon]; 
+
+    };
+
+}
+
+#endif // DLIB_MPC_Hh_
+
diff --git a/ml/dlib/dlib/control/mpc_abstract.h b/ml/dlib/dlib/control/mpc_abstract.h
new file mode 100644
index 000000000..b4421c076
--- /dev/null
+++ b/ml/dlib/dlib/control/mpc_abstract.h
@@ -0,0 +1,276 @@
+// Copyright (C) 2015  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_MPC_ABSTRACT_Hh_
+#ifdef DLIB_MPC_ABSTRACT_Hh_
+
+#include "../matrix.h"
+
+namespace dlib
+{
+    template <
+        long S_,
+        long I_,
+        unsigned long horizon_
+        >
+    class mpc
+    {
+        /*!
+            REQUIREMENTS ON horizon_
+                horizon_ > 0
+
+            REQUIREMENTS ON S_
+                S_ >= 0
+
+            REQUIREMENTS ON I_
+                I_ >= 0
+            
+            WHAT THIS OBJECT REPRESENTS
+                This object implements a linear model predictive controller.  To explain
+                what that means, suppose you have some process you want to control and the
+                process dynamics are described by the linear equation:
+                    x_{i+1} = A*x_i + B*u_i + C
+                That is, the next state the system goes into is a linear function of its
+                current state (x_i) and the current control (u_i) plus some constant bias
+                or disturbance.  
+                
+                A model predictive controller can find the control (u) you should apply to
+                drive the state (x) to some reference value, or alternatively to make the
+                state track some reference time-varying sequence.  It does this by
+                simulating the process for horizon_ time steps and selecting the control
+                that leads to the best performance over the next horizon_ steps.
+                
+                To be precise, each time you ask this object for a control, it solves the
+                following quadratic program:
+        
+                    min    sum_i trans(x_i-target_i)*Q*(x_i-target_i) + trans(u_i)*R*u_i 
+                  x_i,u_i
+
+                    such that: x_0     == current_state 
+                               x_{i+1} == A*x_i + B*u_i + C
+                               lower <= u_i <= upper
+                               0 <= i < horizon_
+
+                and reports u_0 as the control you should take given that you are currently
+                in current_state.  Q and R are user supplied matrices that define how we
+                penalize variations away from the target state as well as how much we want
+                to avoid generating large control signals.  
+                
+                Finally, the algorithm we use to solve this quadratic program is based
+                largely on the method described in:
+                  A Fast Gradient method for embedded linear predictive control (2011)
+                  by Markus Kogel and Rolf Findeisen
+        !*/
+
+    public:
+
+        const static long S = S_;
+        const static long I = I_;
+        const static unsigned long horizon = horizon_;
+
+        mpc(
+        );
+        /*!
+            ensures
+                - #get_max_iterations() == 0
+                - The A,B,C,Q,R,lower, and upper parameter matrices are filled with zeros.
+                  Therefore, to use this object you must initialize it via the constructor
+                  that supplies these parameters.
+        !*/
+
+        mpc (
+            const matrix<double,S,S>& A,
+            const matrix<double,S,I>& B,
+            const matrix<double,S,1>& C,
+            const matrix<double,S,1>& Q,
+            const matrix<double,I,1>& R,
+            const matrix<double,I,1>& lower,
+            const matrix<double,I,1>& upper
+        ); 
+        /*!
+            requires
+                - A.nr() > 0
+                - B.nc() > 0
+                - A.nr() == A.nc() == B.nr() == C.nr() == Q.nr()
+                - B.nc() == R.nr() == lower.nr() == upper.nr()
+                - min(Q) >= 0
+                - min(R) > 0
+                - min(upper-lower) >= 0
+            ensures
+                - #get_A() == A
+                - #get_B() == B
+                - #get_C() == C
+                - #get_Q() == Q
+                - #get_R() == R
+                - #get_lower_constraints() == lower
+                - #get_upper_constraints() == upper 
+                - for all valid i:
+                    - get_target(i) == a vector of all zeros
+                    - get_target(i).size() == A.nr()
+                - #get_max_iterations() == 10000 
+                - #get_epsilon() == 0.01
+        !*/
+
+        const matrix<double,S,S>& get_A (
+        ) const; 
+        /*!
+            ensures
+                - returns the A matrix from the quadratic program defined above. 
+        !*/
+
+        const matrix<double,S,I>& get_B (
+        ) const; 
+        /*!
+            ensures
+                - returns the B matrix from the quadratic program defined above. 
+        !*/
+
+        const matrix<double,S,1>& get_C (
+        ) const;
+        /*!
+            ensures
+                - returns the C matrix from the quadratic program defined above. 
+        !*/
+
+        const matrix<double,S,1>& get_Q (
+        ) const;
+        /*!
+            ensures
+                - returns the diagonal of the Q matrix from the quadratic program defined
+                  above. 
+        !*/
+
+        const matrix<double,I,1>& get_R (
+        ) const;
+        /*!
+            ensures
+                - returns the diagonal of the R matrix from the quadratic program defined
+                  above. 
+        !*/
+
+        const matrix<double,I,1>& get_lower_constraints (
+        ) const;
+        /*!
+            ensures
+                - returns the lower matrix from the quadratic program defined above.  All
+                  controls generated by this object will have values no less than this
+                  lower bound.  That is, any control u will satisfy min(u-lower) >= 0.
+        !*/
+
+        const matrix<double,I,1>& get_upper_constraints (
+        ) const;
+        /*!
+            ensures
+                - returns the upper matrix from the quadratic program defined above.  All
+                  controls generated by this object will have values no larger than this
+                  upper bound.  That is, any control u will satisfy min(upper-u) >= 0.
+        !*/
+
+        const matrix<double,S,1>& get_target (
+            const unsigned long time
+        ) const;
+        /*!
+            requires
+                - time < horizon
+            ensures
+                - This object will try to find the control sequence that results in the
+                  process obtaining get_target(time) state at the indicated time.  Note
+                  that the next time instant after "right now" is time 0. 
+        !*/
+
+        void set_target (
+            const matrix<double,S,1>& val,
+            const unsigned long time
+        );
+        /*!
+            requires
+                - time < horizon
+            ensures
+                - #get_target(time) == val
+        !*/
+
+        void set_target (
+            const matrix<double,S,1>& val
+        );
+        /*!
+            ensures
+                - for all valid t:
+                    - #get_target(t) == val
+        !*/
+
+        void set_last_target (
+            const matrix<double,S,1>& val
+        );
+        /*!
+            ensures
+                - performs: set_target(val, horizon-1)
+        !*/
+
+        unsigned long get_max_iterations (
+        ) const; 
+        /*!
+            ensures
+                - When operator() is called it solves an optimization problem to
+                  get_epsilon() precision to determine the next control action.  In
+                  particular, we run the optimizer until the magnitude of each element of
+                  the gradient vector is less than get_epsilon() or until
+                  get_max_iterations() solver iterations have been executed.
+        !*/
+
+        void set_max_iterations (
+            unsigned long max_iter
+        );
+        /*!
+            ensures
+                - #get_max_iterations() == max_iter
+        !*/
+
+        void set_epsilon (
+            double eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon() == eps
+        !*/
+
+        double get_epsilon (
+        ) const;
+        /*!
+            ensures
+                - When operator() is called it solves an optimization problem to
+                  get_epsilon() precision to determine the next control action.  In
+                  particular, we run the optimizer until the magnitude of each element of
+                  the gradient vector is less than get_epsilon() or until
+                  get_max_iterations() solver iterations have been executed.  This means
+                  that smaller epsilon values will give more accurate outputs but may take
+                  longer to compute.
+        !*/
+
+        matrix<double,I,1> operator() (
+            const matrix<double,S,1>& current_state
+        );
+        /*!
+            requires
+                - min(R) > 0
+                - A.nr() == current_state.size()
+            ensures
+                - Solves the model predictive control problem defined by the arguments to
+                  this objects constructor, assuming that the starting state is given by
+                  current_state.  Then we return the control that should be taken in the
+                  current state that best optimizes the quadratic objective function
+                  defined above.
+                - We also shift over the target states so that you only need to update the
+                  last one (if you are using non-zero target states) via a call to
+                  set_last_target()).  In particular, for all valid t, it will be the case
+                  that:
+                    - #get_target(t) == get_target(t+1)
+                    - #get_target(horizon-1) == get_target(horizon-1)
+        !*/
+
+    };
+
+}
+
+#endif // DLIB_MPC_ABSTRACT_Hh_
+