summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/control
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/dlib/control')
-rw-r--r--ml/dlib/dlib/control/approximate_linear_models.h128
-rw-r--r--ml/dlib/dlib/control/approximate_linear_models_abstract.h213
-rw-r--r--ml/dlib/dlib/control/lspi.h188
-rw-r--r--ml/dlib/dlib/control/lspi_abstract.h193
-rw-r--r--ml/dlib/dlib/control/mpc.h370
-rw-r--r--ml/dlib/dlib/control/mpc_abstract.h276
6 files changed, 1368 insertions, 0 deletions
diff --git a/ml/dlib/dlib/control/approximate_linear_models.h b/ml/dlib/dlib/control/approximate_linear_models.h
new file mode 100644
index 000000000..9732d71e9
--- /dev/null
+++ b/ml/dlib/dlib/control/approximate_linear_models.h
@@ -0,0 +1,128 @@
+// Copyright (C) 2015 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_APPROXIMATE_LINEAR_MODELS_Hh_
+#define DLIB_APPROXIMATE_LINEAR_MODELS_Hh_
+
+#include "approximate_linear_models_abstract.h"
+#include "../matrix.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename feature_extractor
+ >
+ struct process_sample
+ {
+ typedef feature_extractor feature_extractor_type;
+ typedef typename feature_extractor::state_type state_type;
+ typedef typename feature_extractor::action_type action_type;
+
+ process_sample(){}
+
+ process_sample(
+ const state_type& s,
+ const action_type& a,
+ const state_type& n,
+ const double& r
+ ) : state(s), action(a), next_state(n), reward(r) {}
+
+ state_type state;
+ action_type action;
+ state_type next_state;
+ double reward;
+ };
+
+ template < typename feature_extractor >
+ void serialize (const process_sample<feature_extractor>& item, std::ostream& out)
+ {
+ serialize(item.state, out);
+ serialize(item.action, out);
+ serialize(item.next_state, out);
+ serialize(item.reward, out);
+ }
+
+ template < typename feature_extractor >
+ void deserialize (process_sample<feature_extractor>& item, std::istream& in)
+ {
+ deserialize(item.state, in);
+ deserialize(item.action, in);
+ deserialize(item.next_state, in);
+ deserialize(item.reward, in);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename feature_extractor
+ >
+ class policy
+ {
+ public:
+
+ typedef feature_extractor feature_extractor_type;
+ typedef typename feature_extractor::state_type state_type;
+ typedef typename feature_extractor::action_type action_type;
+
+
+ policy (
+ )
+ {
+ w.set_size(fe.num_features());
+ w = 0;
+ }
+
+ policy (
+ const matrix<double,0,1>& weights_,
+ const feature_extractor& fe_
+ ) : w(weights_), fe(fe_) {}
+
+ action_type operator() (
+ const state_type& state
+ ) const
+ {
+ return fe.find_best_action(state,w);
+ }
+
+ const feature_extractor& get_feature_extractor (
+ ) const { return fe; }
+
+ const matrix<double,0,1>& get_weights (
+ ) const { return w; }
+
+
+ private:
+ matrix<double,0,1> w;
+ feature_extractor fe;
+ };
+
+ template < typename feature_extractor >
+ inline void serialize(const policy<feature_extractor>& item, std::ostream& out)
+ {
+ int version = 1;
+ serialize(version, out);
+ serialize(item.get_feature_extractor(), out);
+ serialize(item.get_weights(), out);
+ }
+ template < typename feature_extractor >
+ inline void deserialize(policy<feature_extractor>& item, std::istream& in)
+ {
+ int version = 0;
+ deserialize(version, in);
+ if (version != 1)
+ throw serialization_error("Unexpected version found while deserializing dlib::policy object.");
+ feature_extractor fe;
+ matrix<double,0,1> w;
+ deserialize(fe, in);
+ deserialize(w, in);
+ item = policy<feature_extractor>(w,fe);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_APPROXIMATE_LINEAR_MODELS_Hh_
+
diff --git a/ml/dlib/dlib/control/approximate_linear_models_abstract.h b/ml/dlib/dlib/control/approximate_linear_models_abstract.h
new file mode 100644
index 000000000..59dac4276
--- /dev/null
+++ b/ml/dlib/dlib/control/approximate_linear_models_abstract.h
@@ -0,0 +1,213 @@
+// Copyright (C) 2015 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_APPROXIMATE_LINEAR_MODELS_ABSTRACT_Hh_
+#ifdef DLIB_APPROXIMATE_LINEAR_MODELS_ABSTRACT_Hh_
+
+#include "../matrix.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ struct example_feature_extractor
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This object defines the interface a feature extractor must implement if it
+ is to be used with the process_sample and policy objects defined at the
+ bottom of this file. Moreover, it is meant to represent the core part
+ of a model used in a reinforcement learning algorithm.
+
+ In particular, this object models a Q(state,action) function where
+ Q(state,action) == dot(w, PSI(state,action))
+ where PSI(state,action) is a feature vector and w is a parameter
+ vector.
+
+ Therefore, a feature extractor defines how the PSI(x,y) feature vector is
+ calculated. It also defines the types used to represent the state and
+ action objects.
+
+
+ THREAD SAFETY
+ Instances of this object are required to be threadsafe, that is, it should
+ be safe for multiple threads to make concurrent calls to the member
+ functions of this object.
+ !*/
+
+ // The state and actions can be any types so long as you provide typedefs for them.
+ typedef T state_type;
+ typedef U action_type;
+ // We can also say that the last element in the weight vector w must be 1. This
+ // can be useful for including a prior into your model.
+ const static bool force_last_weight_to_1 = false;
+
+ example_feature_extractor(
+ );
+ /*!
+ ensures
+ - this object is properly initialized.
+ !*/
+
+ unsigned long num_features(
+ ) const;
+ /*!
+ ensures
+ - returns the dimensionality of the PSI() feature vector.
+ !*/
+
+ action_type find_best_action (
+ const state_type& state,
+ const matrix<double,0,1>& w
+ ) const;
+ /*!
+ ensures
+ - returns the action A that maximizes Q(state,A) = dot(w,PSI(state,A)).
+ That is, this function finds the best action to take in the given state
+ when our model is parameterized by the given weight vector w.
+ !*/
+
+ void get_features (
+ const state_type& state,
+ const action_type& action,
+ matrix<double,0,1>& feats
+ ) const;
+ /*!
+ ensures
+ - #feats.size() == num_features()
+ - #feats == PSI(state,action)
+ !*/
+
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename feature_extractor
+ >
+ struct process_sample
+ {
+ /*!
+ REQUIREMENTS ON feature_extractor
+ feature_extractor should implement the example_feature_extractor interface
+ defined at the top of this file.
+
+ WHAT THIS OBJECT REPRESENTS
+ This object holds a training sample for a reinforcement learning algorithm.
+ In particular, it should be a sample from some process where the process
+ was in state this->state, then took this->action action which resulted in
+ receiving this->reward and ending up in the state this->next_state.
+ !*/
+
+ typedef feature_extractor feature_extractor_type;
+ typedef typename feature_extractor::state_type state_type;
+ typedef typename feature_extractor::action_type action_type;
+
+ process_sample(){}
+
+ process_sample(
+ const state_type& s,
+ const action_type& a,
+ const state_type& n,
+ const double& r
+ ) : state(s), action(a), next_state(n), reward(r) {}
+
+ state_type state;
+ action_type action;
+ state_type next_state;
+ double reward;
+ };
+
+ template < typename feature_extractor >
+ void serialize (const process_sample<feature_extractor>& item, std::ostream& out);
+ template < typename feature_extractor >
+ void deserialize (process_sample<feature_extractor>& item, std::istream& in);
+ /*!
+ provides serialization support.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename feature_extractor
+ >
+ class policy
+ {
+ /*!
+ REQUIREMENTS ON feature_extractor
+ feature_extractor should implement the example_feature_extractor interface
+ defined at the top of this file.
+
+ WHAT THIS OBJECT REPRESENTS
+ This is a policy based on the supplied feature_extractor model. In
+ particular, it maps from feature_extractor::state_type to the best action
+ to take in that state.
+ !*/
+
+ public:
+
+ typedef feature_extractor feature_extractor_type;
+ typedef typename feature_extractor::state_type state_type;
+ typedef typename feature_extractor::action_type action_type;
+
+
+ policy (
+ );
+ /*!
+ ensures
+ - #get_feature_extractor() == feature_extractor()
+ (i.e. it will have its default value)
+ - #get_weights().size() == #get_feature_extractor().num_features()
+ - #get_weights() == 0
+ !*/
+
+ policy (
+ const matrix<double,0,1>& weights,
+ const feature_extractor& fe
+ );
+ /*!
+ requires
+ - fe.num_features() == weights.size()
+ ensures
+ - #get_feature_extractor() == fe
+ - #get_weights() == weights
+ !*/
+
+ action_type operator() (
+ const state_type& state
+ ) const;
+ /*!
+ ensures
+ - returns get_feature_extractor().find_best_action(state,w);
+ !*/
+
+ const feature_extractor& get_feature_extractor (
+ ) const;
+ /*!
+ ensures
+ - returns the feature extractor used by this object
+ !*/
+
+ const matrix<double,0,1>& get_weights (
+ ) const;
+ /*!
+ ensures
+ - returns the parameter vector (w) associated with this object. The length
+ of the vector is get_feature_extractor().num_features().
+ !*/
+
+ };
+
+ template < typename feature_extractor >
+ void serialize(const policy<feature_extractor>& item, std::ostream& out);
+ template < typename feature_extractor >
+ void deserialize(policy<feature_extractor>& item, std::istream& in);
+ /*!
+ provides serialization support.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+
+#endif // DLIB_APPROXIMATE_LINEAR_MODELS_ABSTRACT_Hh_
+
diff --git a/ml/dlib/dlib/control/lspi.h b/ml/dlib/dlib/control/lspi.h
new file mode 100644
index 000000000..b21a501d2
--- /dev/null
+++ b/ml/dlib/dlib/control/lspi.h
@@ -0,0 +1,188 @@
+// Copyright (C) 2015 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_LSPI_Hh_
+#define DLIB_LSPI_Hh_
+
+#include "lspi_abstract.h"
+#include "approximate_linear_models.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename feature_extractor
+ >
+ class lspi
+ {
+ public:
+ typedef feature_extractor feature_extractor_type;
+ typedef typename feature_extractor::state_type state_type;
+ typedef typename feature_extractor::action_type action_type;
+
+ explicit lspi(
+ const feature_extractor& fe_
+ ) : fe(fe_)
+ {
+ init();
+ }
+
+ lspi(
+ )
+ {
+ init();
+ }
+
+ double get_discount (
+ ) const { return discount; }
+
+ void set_discount (
+ double value
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(0 < value && value <= 1,
+ "\t void lspi::set_discount(value)"
+ << "\n\t invalid inputs were given to this function"
+ << "\n\t value: " << value
+ );
+ discount = value;
+ }
+
+ const feature_extractor& get_feature_extractor (
+ ) const { return fe; }
+
+ void be_verbose (
+ )
+ {
+ verbose = true;
+ }
+
+ void be_quiet (
+ )
+ {
+ verbose = false;
+ }
+
+ void set_epsilon (
+ double eps_
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(eps_ > 0,
+ "\t void lspi::set_epsilon(eps_)"
+ << "\n\t invalid inputs were given to this function"
+ << "\n\t eps_: " << eps_
+ );
+ eps = eps_;
+ }
+
+ double get_epsilon (
+ ) const
+ {
+ return eps;
+ }
+
+ void set_lambda (
+ double lambda_
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(lambda_ >= 0,
+ "\t void lspi::set_lambda(lambda_)"
+ << "\n\t invalid inputs were given to this function"
+ << "\n\t lambda_: " << lambda_
+ );
+ lambda = lambda_;
+ }
+
+ double get_lambda (
+ ) const
+ {
+ return lambda;
+ }
+
+ void set_max_iterations (
+ unsigned long max_iter
+ ) { max_iterations = max_iter; }
+
+ unsigned long get_max_iterations (
+ ) { return max_iterations; }
+
+ template <typename vector_type>
+ policy<feature_extractor> train (
+ const vector_type& samples
+ ) const
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(samples.size() > 0,
+ "\t policy lspi::train(samples)"
+ << "\n\t invalid inputs were given to this function"
+ );
+
+ matrix<double,0,1> w(fe.num_features());
+ w = 0;
+ matrix<double,0,1> prev_w, b, f1, f2;
+
+ matrix<double> A;
+
+ double change;
+ unsigned long iter = 0;
+ do
+ {
+ A = identity_matrix<double>(fe.num_features())*lambda;
+ b = 0;
+ for (unsigned long i = 0; i < samples.size(); ++i)
+ {
+ fe.get_features(samples[i].state, samples[i].action, f1);
+ fe.get_features(samples[i].next_state,
+ fe.find_best_action(samples[i].next_state,w),
+ f2);
+ A += f1*trans(f1 - discount*f2);
+ b += f1*samples[i].reward;
+ }
+
+ prev_w = w;
+ if (feature_extractor::force_last_weight_to_1)
+ w = join_cols(pinv(colm(A,range(0,A.nc()-2)))*(b-colm(A,A.nc()-1)),mat(1.0));
+ else
+ w = pinv(A)*b;
+
+ change = length(w-prev_w);
+ ++iter;
+
+ if (verbose)
+ std::cout << "iteration: " << iter << "\tchange: " << change << std::endl;
+
+ } while(change > eps && iter < max_iterations);
+
+ return policy<feature_extractor>(w,fe);
+ }
+
+
+ private:
+
+ void init()
+ {
+ lambda = 0.01;
+ discount = 0.8;
+ eps = 0.01;
+ verbose = false;
+ max_iterations = 100;
+ }
+
+ double lambda;
+ double discount;
+ double eps;
+ bool verbose;
+ unsigned long max_iterations;
+ feature_extractor fe;
+ };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_LSPI_Hh_
+
diff --git a/ml/dlib/dlib/control/lspi_abstract.h b/ml/dlib/dlib/control/lspi_abstract.h
new file mode 100644
index 000000000..f262d16f4
--- /dev/null
+++ b/ml/dlib/dlib/control/lspi_abstract.h
@@ -0,0 +1,193 @@
+// Copyright (C) 2015 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_LSPI_ABSTRACT_Hh_
+#ifdef DLIB_LSPI_ABSTRACT_Hh_
+
+#include "approximate_linear_models_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename feature_extractor
+ >
+ class lspi
+ {
+ /*!
+ REQUIREMENTS ON feature_extractor
+ feature_extractor should implement the example_feature_extractor interface
+ defined at the top of dlib/control/approximate_linear_models_abstract.h
+
+ WHAT THIS OBJECT REPRESENTS
+ This object is an implementation of the reinforcement learning algorithm
+ described in the following paper:
+ Lagoudakis, Michail G., and Ronald Parr. "Least-squares policy
+ iteration." The Journal of Machine Learning Research 4 (2003):
+ 1107-1149.
+
+ This means that it takes a bunch of training data in the form of
+ process_samples and outputs a policy that hopefully performs well when run
+ on the process that generated those samples.
+ !*/
+
+ public:
+ typedef feature_extractor feature_extractor_type;
+ typedef typename feature_extractor::state_type state_type;
+ typedef typename feature_extractor::action_type action_type;
+
+ explicit lspi(
+ const feature_extractor& fe_
+ );
+ /*!
+ ensures
+ - #get_feature_extractor() == fe_
+ - #get_lambda() == 0.01
+ - #get_discount == 0.8
+ - #get_epsilon() == 0.01
+ - is not verbose
+ - #get_max_iterations() == 100
+ !*/
+
+ lspi(
+ );
+ /*!
+ ensures
+ - #get_feature_extractor() == feature_extractor()
+ (i.e. it will have its default value)
+ - #get_lambda() == 0.01
+ - #get_discount == 0.8
+ - #get_epsilon() == 0.01
+ - is not verbose
+ - #get_max_iterations() == 100
+ !*/
+
+ double get_discount (
+ ) const;
+ /*!
+ ensures
+ - returns the discount applied to the sum of rewards in the Bellman
+ equation.
+ !*/
+
+ void set_discount (
+ double value
+ );
+ /*!
+ requires
+ - 0 < value <= 1
+ ensures
+ - #get_discount() == value
+ !*/
+
+ const feature_extractor& get_feature_extractor (
+ ) const;
+ /*!
+ ensures
+ - returns the feature extractor used by this object
+ !*/
+
+ void be_verbose (
+ );
+ /*!
+ ensures
+ - This object will print status messages to standard out so that a
+ user can observe the progress of the algorithm.
+ !*/
+
+ void be_quiet (
+ );
+ /*!
+ ensures
+ - this object will not print anything to standard out
+ !*/
+
+ void set_epsilon (
+ double eps
+ );
+ /*!
+ requires
+ - eps > 0
+ ensures
+ - #get_epsilon() == eps
+ !*/
+
+ double get_epsilon (
+ ) const;
+ /*!
+ ensures
+ - returns the error epsilon that determines when training should stop.
+ Smaller values may result in a more accurate solution but take longer to
+ train.
+ !*/
+
+ void set_lambda (
+ double lambda_
+ );
+ /*!
+ requires
+ - lambda >= 0
+ ensures
+ - #get_lambda() == lambda
+ !*/
+
+ double get_lambda (
+ ) const;
+ /*!
+ ensures
+ - returns the regularization parameter. It is the parameter that
+ determines the trade off between trying to fit the training data
+ exactly or allowing more errors but hopefully improving the
+ generalization ability of the resulting function. Smaller values
+ encourage exact fitting while larger values of lambda may encourage
+ better generalization.
+ !*/
+
+ void set_max_iterations (
+ unsigned long max_iter
+ );
+ /*!
+ ensures
+ - #get_max_iterations() == max_iter
+ !*/
+
+ unsigned long get_max_iterations (
+ );
+ /*!
+ ensures
+ - returns the maximum number of iterations the SVM optimizer is allowed to
+ run before it is required to stop and return a result.
+ !*/
+
+ template <
+ typename vector_type
+ >
+ policy<feature_extractor> train (
+ const vector_type& samples
+ ) const;
+ /*!
+ requires
+ - samples.size() > 0
+ - samples is something with an interface that looks like
+ std::vector<process_sample<feature_extractor>>. That is, it should
+ be some kind of array of process_sample objects.
+ ensures
+ - Trains a policy based on the given data and returns the results. The
+ idea is to find a policy that will obtain the largest possible reward
+ when run on the process that generated the samples. In particular,
+ if the returned policy is P then:
+ - P(S) == the best action to take when in state S.
+ - if (feature_extractor::force_last_weight_to_1) then
+ - The last element of P.get_weights() is 1.
+ !*/
+
+ };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_LSPI_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/control/mpc.h b/ml/dlib/dlib/control/mpc.h
new file mode 100644
index 000000000..48ef2b72d
--- /dev/null
+++ b/ml/dlib/dlib/control/mpc.h
@@ -0,0 +1,370 @@
+// Copyright (C) 2015 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_MPC_Hh_
+#define DLIB_MPC_Hh_
+
+#include "mpc_abstract.h"
+#include "../matrix.h"
+#include "../algs.h"
+
+
+namespace dlib
+{
+ template <
+ long S_,
+ long I_,
+ unsigned long horizon_
+ >
+ class mpc
+ {
+
+ public:
+
+ const static long S = S_;
+ const static long I = I_;
+ const static unsigned long horizon = horizon_;
+
+ mpc(
+ )
+ {
+ A = 0;
+ B = 0;
+ C = 0;
+ Q = 0;
+ R = 0;
+ lower = 0;
+ upper = 0;
+
+ max_iterations = 0;
+ eps = 0.01;
+ for (unsigned long i = 0; i < horizon; ++i)
+ {
+ target[i].set_size(A.nr());
+ target[i] = 0;
+
+ controls[i].set_size(B.nc());
+ controls[i] = 0;
+ }
+ lambda = 0;
+ }
+
+ mpc (
+ const matrix<double,S,S>& A_,
+ const matrix<double,S,I>& B_,
+ const matrix<double,S,1>& C_,
+ const matrix<double,S,1>& Q_,
+ const matrix<double,I,1>& R_,
+ const matrix<double,I,1>& lower_,
+ const matrix<double,I,1>& upper_
+ ) : A(A_), B(B_), C(C_), Q(Q_), R(R_), lower(lower_), upper(upper_)
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(A.nr() > 0 && B.nc() > 0,
+ "\t mpc::mpc()"
+ << "\n\t invalid inputs were given to this function"
+ << "\n\t A.nr(): " << A.nr()
+ << "\n\t B.nc(): " << B.nc()
+ );
+
+ DLIB_ASSERT(A.nr() == A.nc() &&
+ A.nr() == B.nr() &&
+ A.nr() == C.nr() &&
+ A.nr() == Q.nr(),
+ "\t mpc::mpc()"
+ << "\n\t invalid inputs were given to this function"
+ << "\n\t A.nr(): " << A.nr()
+ << "\n\t A.nc(): " << A.nc()
+ << "\n\t B.nr(): " << B.nr()
+ << "\n\t C.nr(): " << C.nr()
+ << "\n\t Q.nr(): " << Q.nr()
+ );
+ DLIB_ASSERT(
+ B.nc() == R.nr() &&
+ B.nc() == lower.nr() &&
+ B.nc() == upper.nr() ,
+ "\t mpc::mpc()"
+ << "\n\t invalid inputs were given to this function"
+ << "\n\t B.nr(): " << B.nr()
+ << "\n\t B.nc(): " << B.nc()
+ << "\n\t lower.nr(): " << lower.nr()
+ << "\n\t upper.nr(): " << upper.nr()
+ );
+ DLIB_ASSERT(min(Q) >= 0 &&
+ min(R) > 0 &&
+ min(upper-lower) >= 0,
+ "\t mpc::mpc()"
+ << "\n\t invalid inputs were given to this function"
+ << "\n\t min(Q): " << min(Q)
+ << "\n\t min(R): " << min(R)
+ << "\n\t min(upper-lower): " << min(upper-lower)
+ );
+
+
+ max_iterations = 10000;
+ eps = 0.01;
+ for (unsigned long i = 0; i < horizon; ++i)
+ {
+ target[i].set_size(A.nr());
+ target[i] = 0;
+
+ controls[i].set_size(B.nc());
+ controls[i] = 0;
+ }
+
+ // Bound the maximum eigenvalue of the hessian by computing the trace of the
+ // hessian matrix.
+ lambda = sum(R)*horizon;
+ matrix<double,S,S> temp = diagm(Q);
+ for (unsigned long c = 0; c < horizon; ++c)
+ {
+ lambda += trace(trans(B)*temp*B);
+ Q_diag[horizon-c-1] = diag(trans(B)*temp*B);
+ temp = trans(A)*temp*A + diagm(Q);
+ }
+
+ }
+
+ const matrix<double,S,S>& get_A (
+ ) const { return A; }
+ const matrix<double,S,I>& get_B (
+ ) const { return B; }
+ const matrix<double,S,1>& get_C (
+ ) const { return C; }
+ const matrix<double,S,1>& get_Q (
+ ) const { return Q; }
+ const matrix<double,I,1>& get_R (
+ ) const { return R; }
+ const matrix<double,I,1>& get_lower_constraints (
+ ) const { return lower; }
+ const matrix<double,I,1>& get_upper_constraints (
+ ) const { return upper; }
+
+ void set_target (
+ const matrix<double,S,1>& val,
+ const unsigned long time
+ )
+ {
+ DLIB_ASSERT(time < horizon,
+ "\t void mpc::set_target(eps_)"
+ << "\n\t invalid inputs were given to this function"
+ << "\n\t time: " << time
+ << "\n\t horizon: " << horizon
+ );
+
+ target[time] = val;
+ }
+
+ void set_target (
+ const matrix<double,S,1>& val
+ )
+ {
+ for (unsigned long i = 0; i < horizon; ++i)
+ target[i] = val;
+ }
+
+ void set_last_target (
+ const matrix<double,S,1>& val
+ )
+ {
+ set_target(val, horizon-1);
+ }
+
+ const matrix<double,S,1>& get_target (
+ const unsigned long time
+ ) const
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(time < horizon,
+ "\t matrix mpc::get_target(eps_)"
+ << "\n\t invalid inputs were given to this function"
+ << "\n\t time: " << time
+ << "\n\t horizon: " << horizon
+ );
+
+ return target[time];
+ }
+
+ unsigned long get_max_iterations (
+ ) const { return max_iterations; }
+
+ void set_max_iterations (
+ unsigned long max_iter
+ )
+ {
+ max_iterations = max_iter;
+ }
+
+ void set_epsilon (
+ double eps_
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(eps_ > 0,
+ "\t void mpc::set_epsilon(eps_)"
+ << "\n\t invalid inputs were given to this function"
+ << "\n\t eps_: " << eps_
+ );
+ eps = eps_;
+ }
+
+ double get_epsilon (
+ ) const
+ {
+ return eps;
+ }
+
+ matrix<double,I,1> operator() (
+ const matrix<double,S,1>& current_state
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(min(R) > 0 && A.nr() == current_state.size(),
+ "\t matrix mpc::operator(current_state)"
+ << "\n\t invalid inputs were given to this function"
+ << "\n\t min(R): " << min(R)
+ << "\n\t A.nr(): " << A.nr()
+ << "\n\t current_state.size(): " << current_state.size()
+ );
+
+ // Shift the inputs over by one time step so we can use them to warm start the
+ // optimizer.
+ for (unsigned long i = 1; i < horizon; ++i)
+ controls[i-1] = controls[i];
+
+ solve_linear_mpc(current_state);
+
+ for (unsigned long i = 1; i < horizon; ++i)
+ target[i-1] = target[i];
+
+ return controls[0];
+ }
+
+ private:
+
+
+ // These temporary variables here just to avoid reallocating them on each call to
+ // operator().
+ matrix<double,S,1> M[horizon];
+ matrix<double,I,1> MM[horizon];
+ matrix<double,I,1> df[horizon];
+ matrix<double,I,1> v[horizon];
+ matrix<double,I,1> v_old[horizon];
+
+ void solve_linear_mpc (
+ const matrix<double,S,1>& initial_state
+ )
+ {
+ // make it so MM == trans(K)*Q*(M-target)
+ M[0] = A*initial_state + C;
+ for (unsigned long i = 1; i < horizon; ++i)
+ M[i] = A*M[i-1] + C;
+ for (unsigned long i = 0; i < horizon; ++i)
+ M[i] = diagm(Q)*(M[i]-target[i]);
+ for (long i = (long)horizon-2; i >= 0; --i)
+ M[i] += trans(A)*M[i+1];
+ for (unsigned long i = 0; i < horizon; ++i)
+ MM[i] = trans(B)*M[i];
+
+
+
+ unsigned long iter = 0;
+ for (; iter < max_iterations; ++iter)
+ {
+ // compute current gradient and put it into df.
+ // df == H*controls + MM;
+ M[0] = B*controls[0];
+ for (unsigned long i = 1; i < horizon; ++i)
+ M[i] = A*M[i-1] + B*controls[i];
+ for (unsigned long i = 0; i < horizon; ++i)
+ M[i] = diagm(Q)*M[i];
+ for (long i = (long)horizon-2; i >= 0; --i)
+ M[i] += trans(A)*M[i+1];
+ for (unsigned long i = 0; i < horizon; ++i)
+ df[i] = MM[i] + trans(B)*M[i] + diagm(R)*controls[i];
+
+
+
+ // Check the stopping condition, which is the magnitude of the largest element
+ // of the gradient.
+ double max_df = 0;
+ unsigned long max_t = 0;
+ long max_v = 0;
+ for (unsigned long i = 0; i < horizon; ++i)
+ {
+ for (long j = 0; j < controls[i].size(); ++j)
+ {
+ // if this variable isn't an active constraint then we care about it's
+ // derivative.
+ if (!((controls[i](j) <= lower(j) && df[i](j) > 0) ||
+ (controls[i](j) >= upper(j) && df[i](j) < 0)))
+ {
+ if (std::abs(df[i](j)) > max_df)
+ {
+ max_df = std::abs(df[i](j));
+ max_t = i;
+ max_v = j;
+ }
+ }
+ }
+ }
+ if (max_df < eps)
+ break;
+
+
+
+ // We will start out by doing a little bit of coordinate descent because it
+ // allows us to optimize individual variables exactly. Since we are warm
+ // starting each iteration with a really good solution this helps speed
+ // things up a lot.
+ const unsigned long smo_iters = 50;
+ if (iter < smo_iters)
+ {
+ if (Q_diag[max_t](max_v) == 0) continue;
+
+ // Take the optimal step but just for one variable.
+ controls[max_t](max_v) = -(df[max_t](max_v)-Q_diag[max_t](max_v)*controls[max_t](max_v))/Q_diag[max_t](max_v);
+ controls[max_t](max_v) = put_in_range(lower(max_v), upper(max_v), controls[max_t](max_v));
+
+ // If this is the last SMO iteration then don't forget to initialize v
+ // for the gradient steps.
+ if (iter+1 == smo_iters)
+ {
+ for (unsigned long i = 0; i < horizon; ++i)
+ v[i] = controls[i];
+ }
+ }
+ else
+ {
+ // Take a projected gradient step.
+ for (unsigned long i = 0; i < horizon; ++i)
+ {
+ v_old[i] = v[i];
+ v[i] = dlib::clamp(controls[i] - 1.0/lambda * df[i], lower, upper);
+ controls[i] = dlib::clamp(v[i] + (std::sqrt(lambda)-1)/(std::sqrt(lambda)+1)*(v[i]-v_old[i]), lower, upper);
+ }
+ }
+ }
+ }
+
+ unsigned long max_iterations;
+ double eps;
+
+ matrix<double,S,S> A;
+ matrix<double,S,I> B;
+ matrix<double,S,1> C;
+ matrix<double,S,1> Q;
+ matrix<double,I,1> R;
+ matrix<double,I,1> lower;
+ matrix<double,I,1> upper;
+ matrix<double,S,1> target[horizon];
+
+ double lambda; // abound on the largest eigenvalue of the hessian matrix.
+ matrix<double,I,1> Q_diag[horizon];
+ matrix<double,I,1> controls[horizon];
+
+ };
+
+}
+
+#endif // DLIB_MPC_Hh_
+
diff --git a/ml/dlib/dlib/control/mpc_abstract.h b/ml/dlib/dlib/control/mpc_abstract.h
new file mode 100644
index 000000000..b4421c076
--- /dev/null
+++ b/ml/dlib/dlib/control/mpc_abstract.h
@@ -0,0 +1,276 @@
+// Copyright (C) 2015 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_MPC_ABSTRACT_Hh_
+#ifdef DLIB_MPC_ABSTRACT_Hh_
+
+#include "../matrix.h"
+
+namespace dlib
+{
+ template <
+ long S_,
+ long I_,
+ unsigned long horizon_
+ >
+ class mpc
+ {
+ /*!
+ REQUIREMENTS ON horizon_
+ horizon_ > 0
+
+ REQUIREMENTS ON S_
+ S_ >= 0
+
+ REQUIREMENTS ON I_
+ I_ >= 0
+
+ WHAT THIS OBJECT REPRESENTS
+ This object implements a linear model predictive controller. To explain
+ what that means, suppose you have some process you want to control and the
+ process dynamics are described by the linear equation:
+ x_{i+1} = A*x_i + B*u_i + C
+ That is, the next state the system goes into is a linear function of its
+ current state (x_i) and the current control (u_i) plus some constant bias
+ or disturbance.
+
+ A model predictive controller can find the control (u) you should apply to
+ drive the state (x) to some reference value, or alternatively to make the
+ state track some reference time-varying sequence. It does this by
+ simulating the process for horizon_ time steps and selecting the control
+ that leads to the best performance over the next horizon_ steps.
+
+ To be precise, each time you ask this object for a control, it solves the
+ following quadratic program:
+
+ min sum_i trans(x_i-target_i)*Q*(x_i-target_i) + trans(u_i)*R*u_i
+ x_i,u_i
+
+ such that: x_0 == current_state
+ x_{i+1} == A*x_i + B*u_i + C
+ lower <= u_i <= upper
+ 0 <= i < horizon_
+
+ and reports u_0 as the control you should take given that you are currently
+ in current_state. Q and R are user supplied matrices that define how we
+ penalize variations away from the target state as well as how much we want
+ to avoid generating large control signals.
+
+ Finally, the algorithm we use to solve this quadratic program is based
+ largely on the method described in:
+ A Fast Gradient method for embedded linear predictive control (2011)
+ by Markus Kogel and Rolf Findeisen
+ !*/
+
+ public:
+
+ const static long S = S_;
+ const static long I = I_;
+ const static unsigned long horizon = horizon_;
+
+ mpc(
+ );
+ /*!
+ ensures
+ - #get_max_iterations() == 0
+ - The A,B,C,Q,R,lower, and upper parameter matrices are filled with zeros.
+ Therefore, to use this object you must initialize it via the constructor
+ that supplies these parameters.
+ !*/
+
+ mpc (
+ const matrix<double,S,S>& A,
+ const matrix<double,S,I>& B,
+ const matrix<double,S,1>& C,
+ const matrix<double,S,1>& Q,
+ const matrix<double,I,1>& R,
+ const matrix<double,I,1>& lower,
+ const matrix<double,I,1>& upper
+ );
+ /*!
+ requires
+ - A.nr() > 0
+ - B.nc() > 0
+ - A.nr() == A.nc() == B.nr() == C.nr() == Q.nr()
+ - B.nc() == R.nr() == lower.nr() == upper.nr()
+ - min(Q) >= 0
+ - min(R) > 0
+ - min(upper-lower) >= 0
+ ensures
+ - #get_A() == A
+ - #get_B() == B
+ - #get_C() == C
+ - #get_Q() == Q
+ - #get_R() == R
+ - #get_lower_constraints() == lower
+ - #get_upper_constraints() == upper
+ - for all valid i:
+ - get_target(i) == a vector of all zeros
+ - get_target(i).size() == A.nr()
+ - #get_max_iterations() == 10000
+ - #get_epsilon() == 0.01
+ !*/
+
+ const matrix<double,S,S>& get_A (
+ ) const;
+ /*!
+ ensures
+ - returns the A matrix from the quadratic program defined above.
+ !*/
+
+ const matrix<double,S,I>& get_B (
+ ) const;
+ /*!
+ ensures
+ - returns the B matrix from the quadratic program defined above.
+ !*/
+
+ const matrix<double,S,1>& get_C (
+ ) const;
+ /*!
+ ensures
+ - returns the C matrix from the quadratic program defined above.
+ !*/
+
+ const matrix<double,S,1>& get_Q (
+ ) const;
+ /*!
+ ensures
+ - returns the diagonal of the Q matrix from the quadratic program defined
+ above.
+ !*/
+
+ const matrix<double,I,1>& get_R (
+ ) const;
+ /*!
+ ensures
+ - returns the diagonal of the R matrix from the quadratic program defined
+ above.
+ !*/
+
+ const matrix<double,I,1>& get_lower_constraints (
+ ) const;
+ /*!
+ ensures
+ - returns the lower matrix from the quadratic program defined above. All
+ controls generated by this object will have values no less than this
+ lower bound. That is, any control u will satisfy min(u-lower) >= 0.
+ !*/
+
+ const matrix<double,I,1>& get_upper_constraints (
+ ) const;
+ /*!
+ ensures
+ - returns the upper matrix from the quadratic program defined above. All
+ controls generated by this object will have values no larger than this
+ upper bound. That is, any control u will satisfy min(upper-u) >= 0.
+ !*/
+
+ const matrix<double,S,1>& get_target (
+ const unsigned long time
+ ) const;
+ /*!
+ requires
+ - time < horizon
+ ensures
+ - This object will try to find the control sequence that results in the
+ process obtaining get_target(time) state at the indicated time. Note
+ that the next time instant after "right now" is time 0.
+ !*/
+
+ void set_target (
+ const matrix<double,S,1>& val,
+ const unsigned long time
+ );
+ /*!
+ requires
+ - time < horizon
+ ensures
+ - #get_target(time) == val
+ !*/
+
+ void set_target (
+ const matrix<double,S,1>& val
+ );
+ /*!
+ ensures
+ - for all valid t:
+ - #get_target(t) == val
+ !*/
+
+ void set_last_target (
+ const matrix<double,S,1>& val
+ );
+ /*!
+ ensures
+ - performs: set_target(val, horizon-1)
+ !*/
+
+ unsigned long get_max_iterations (
+ ) const;
+ /*!
+ ensures
+ - When operator() is called it solves an optimization problem to
+ get_epsilon() precision to determine the next control action. In
+ particular, we run the optimizer until the magnitude of each element of
+ the gradient vector is less than get_epsilon() or until
+ get_max_iterations() solver iterations have been executed.
+ !*/
+
+ void set_max_iterations (
+ unsigned long max_iter
+ );
+ /*!
+ ensures
+ - #get_max_iterations() == max_iter
+ !*/
+
+ void set_epsilon (
+ double eps
+ );
+ /*!
+ requires
+ - eps > 0
+ ensures
+ - #get_epsilon() == eps
+ !*/
+
+ double get_epsilon (
+ ) const;
+ /*!
+ ensures
+ - When operator() is called it solves an optimization problem to
+ get_epsilon() precision to determine the next control action. In
+ particular, we run the optimizer until the magnitude of each element of
+ the gradient vector is less than get_epsilon() or until
+ get_max_iterations() solver iterations have been executed. This means
+ that smaller epsilon values will give more accurate outputs but may take
+ longer to compute.
+ !*/
+
+ matrix<double,I,1> operator() (
+ const matrix<double,S,1>& current_state
+ );
+ /*!
+ requires
+ - min(R) > 0
+ - A.nr() == current_state.size()
+ ensures
+ - Solves the model predictive control problem defined by the arguments to
+ this objects constructor, assuming that the starting state is given by
+ current_state. Then we return the control that should be taken in the
+ current state that best optimizes the quadratic objective function
+ defined above.
+ - We also shift over the target states so that you only need to update the
+ last one (if you are using non-zero target states) via a call to
+ set_last_target()). In particular, for all valid t, it will be the case
+ that:
+ - #get_target(t) == get_target(t+1)
+ - #get_target(horizon-1) == get_target(horizon-1)
+ !*/
+
+ };
+
+}
+
+#endif // DLIB_MPC_ABSTRACT_Hh_
+