diff options
Diffstat (limited to 'ml/dlib/dlib/control')
-rw-r--r-- | ml/dlib/dlib/control/approximate_linear_models.h | 128 | ||||
-rw-r--r-- | ml/dlib/dlib/control/approximate_linear_models_abstract.h | 213 | ||||
-rw-r--r-- | ml/dlib/dlib/control/lspi.h | 188 | ||||
-rw-r--r-- | ml/dlib/dlib/control/lspi_abstract.h | 193 | ||||
-rw-r--r-- | ml/dlib/dlib/control/mpc.h | 370 | ||||
-rw-r--r-- | ml/dlib/dlib/control/mpc_abstract.h | 276 |
6 files changed, 1368 insertions, 0 deletions
diff --git a/ml/dlib/dlib/control/approximate_linear_models.h b/ml/dlib/dlib/control/approximate_linear_models.h new file mode 100644 index 000000000..9732d71e9 --- /dev/null +++ b/ml/dlib/dlib/control/approximate_linear_models.h @@ -0,0 +1,128 @@ +// Copyright (C) 2015 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_APPROXIMATE_LINEAR_MODELS_Hh_ +#define DLIB_APPROXIMATE_LINEAR_MODELS_Hh_ + +#include "approximate_linear_models_abstract.h" +#include "../matrix.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename feature_extractor + > + struct process_sample + { + typedef feature_extractor feature_extractor_type; + typedef typename feature_extractor::state_type state_type; + typedef typename feature_extractor::action_type action_type; + + process_sample(){} + + process_sample( + const state_type& s, + const action_type& a, + const state_type& n, + const double& r + ) : state(s), action(a), next_state(n), reward(r) {} + + state_type state; + action_type action; + state_type next_state; + double reward; + }; + + template < typename feature_extractor > + void serialize (const process_sample<feature_extractor>& item, std::ostream& out) + { + serialize(item.state, out); + serialize(item.action, out); + serialize(item.next_state, out); + serialize(item.reward, out); + } + + template < typename feature_extractor > + void deserialize (process_sample<feature_extractor>& item, std::istream& in) + { + deserialize(item.state, in); + deserialize(item.action, in); + deserialize(item.next_state, in); + deserialize(item.reward, in); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename feature_extractor + > + class policy + { + public: + + typedef feature_extractor feature_extractor_type; + typedef typename feature_extractor::state_type state_type; + typedef typename feature_extractor::action_type action_type; + + + policy ( + ) + { + w.set_size(fe.num_features()); + w = 0; + } + + policy ( + const matrix<double,0,1>& weights_, + const feature_extractor& fe_ + ) : w(weights_), fe(fe_) {} + + action_type operator() ( + const state_type& state + ) const + { + return fe.find_best_action(state,w); + } + + const feature_extractor& get_feature_extractor ( + ) const { return fe; } + + const matrix<double,0,1>& get_weights ( + ) const { return w; } + + + private: + matrix<double,0,1> w; + feature_extractor fe; + }; + + template < typename feature_extractor > + inline void serialize(const policy<feature_extractor>& item, std::ostream& out) + { + int version = 1; + serialize(version, out); + serialize(item.get_feature_extractor(), out); + serialize(item.get_weights(), out); + } + template < typename feature_extractor > + inline void deserialize(policy<feature_extractor>& item, std::istream& in) + { + int version = 0; + deserialize(version, in); + if (version != 1) + throw serialization_error("Unexpected version found while deserializing dlib::policy object."); + feature_extractor fe; + matrix<double,0,1> w; + deserialize(fe, in); + deserialize(w, in); + item = policy<feature_extractor>(w,fe); + } + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_APPROXIMATE_LINEAR_MODELS_Hh_ + diff --git a/ml/dlib/dlib/control/approximate_linear_models_abstract.h b/ml/dlib/dlib/control/approximate_linear_models_abstract.h new file mode 100644 index 000000000..59dac4276 --- /dev/null +++ b/ml/dlib/dlib/control/approximate_linear_models_abstract.h @@ -0,0 +1,213 @@ +// Copyright (C) 2015 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_APPROXIMATE_LINEAR_MODELS_ABSTRACT_Hh_ +#ifdef DLIB_APPROXIMATE_LINEAR_MODELS_ABSTRACT_Hh_ + +#include "../matrix.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + struct example_feature_extractor + { + /*! + WHAT THIS OBJECT REPRESENTS + This object defines the interface a feature extractor must implement if it + is to be used with the process_sample and policy objects defined at the + bottom of this file. Moreover, it is meant to represent the core part + of a model used in a reinforcement learning algorithm. + + In particular, this object models a Q(state,action) function where + Q(state,action) == dot(w, PSI(state,action)) + where PSI(state,action) is a feature vector and w is a parameter + vector. + + Therefore, a feature extractor defines how the PSI(x,y) feature vector is + calculated. It also defines the types used to represent the state and + action objects. + + + THREAD SAFETY + Instances of this object are required to be threadsafe, that is, it should + be safe for multiple threads to make concurrent calls to the member + functions of this object. + !*/ + + // The state and actions can be any types so long as you provide typedefs for them. + typedef T state_type; + typedef U action_type; + // We can also say that the last element in the weight vector w must be 1. This + // can be useful for including a prior into your model. + const static bool force_last_weight_to_1 = false; + + example_feature_extractor( + ); + /*! + ensures + - this object is properly initialized. + !*/ + + unsigned long num_features( + ) const; + /*! + ensures + - returns the dimensionality of the PSI() feature vector. + !*/ + + action_type find_best_action ( + const state_type& state, + const matrix<double,0,1>& w + ) const; + /*! + ensures + - returns the action A that maximizes Q(state,A) = dot(w,PSI(state,A)). + That is, this function finds the best action to take in the given state + when our model is parameterized by the given weight vector w. + !*/ + + void get_features ( + const state_type& state, + const action_type& action, + matrix<double,0,1>& feats + ) const; + /*! + ensures + - #feats.size() == num_features() + - #feats == PSI(state,action) + !*/ + + }; + +// ---------------------------------------------------------------------------------------- + + template < + typename feature_extractor + > + struct process_sample + { + /*! + REQUIREMENTS ON feature_extractor + feature_extractor should implement the example_feature_extractor interface + defined at the top of this file. + + WHAT THIS OBJECT REPRESENTS + This object holds a training sample for a reinforcement learning algorithm. + In particular, it should be a sample from some process where the process + was in state this->state, then took this->action action which resulted in + receiving this->reward and ending up in the state this->next_state. + !*/ + + typedef feature_extractor feature_extractor_type; + typedef typename feature_extractor::state_type state_type; + typedef typename feature_extractor::action_type action_type; + + process_sample(){} + + process_sample( + const state_type& s, + const action_type& a, + const state_type& n, + const double& r + ) : state(s), action(a), next_state(n), reward(r) {} + + state_type state; + action_type action; + state_type next_state; + double reward; + }; + + template < typename feature_extractor > + void serialize (const process_sample<feature_extractor>& item, std::ostream& out); + template < typename feature_extractor > + void deserialize (process_sample<feature_extractor>& item, std::istream& in); + /*! + provides serialization support. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename feature_extractor + > + class policy + { + /*! + REQUIREMENTS ON feature_extractor + feature_extractor should implement the example_feature_extractor interface + defined at the top of this file. + + WHAT THIS OBJECT REPRESENTS + This is a policy based on the supplied feature_extractor model. In + particular, it maps from feature_extractor::state_type to the best action + to take in that state. + !*/ + + public: + + typedef feature_extractor feature_extractor_type; + typedef typename feature_extractor::state_type state_type; + typedef typename feature_extractor::action_type action_type; + + + policy ( + ); + /*! + ensures + - #get_feature_extractor() == feature_extractor() + (i.e. it will have its default value) + - #get_weights().size() == #get_feature_extractor().num_features() + - #get_weights() == 0 + !*/ + + policy ( + const matrix<double,0,1>& weights, + const feature_extractor& fe + ); + /*! + requires + - fe.num_features() == weights.size() + ensures + - #get_feature_extractor() == fe + - #get_weights() == weights + !*/ + + action_type operator() ( + const state_type& state + ) const; + /*! + ensures + - returns get_feature_extractor().find_best_action(state,w); + !*/ + + const feature_extractor& get_feature_extractor ( + ) const; + /*! + ensures + - returns the feature extractor used by this object + !*/ + + const matrix<double,0,1>& get_weights ( + ) const; + /*! + ensures + - returns the parameter vector (w) associated with this object. The length + of the vector is get_feature_extractor().num_features(). + !*/ + + }; + + template < typename feature_extractor > + void serialize(const policy<feature_extractor>& item, std::ostream& out); + template < typename feature_extractor > + void deserialize(policy<feature_extractor>& item, std::istream& in); + /*! + provides serialization support. + !*/ + +// ---------------------------------------------------------------------------------------- + + +#endif // DLIB_APPROXIMATE_LINEAR_MODELS_ABSTRACT_Hh_ + diff --git a/ml/dlib/dlib/control/lspi.h b/ml/dlib/dlib/control/lspi.h new file mode 100644 index 000000000..b21a501d2 --- /dev/null +++ b/ml/dlib/dlib/control/lspi.h @@ -0,0 +1,188 @@ +// Copyright (C) 2015 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_LSPI_Hh_ +#define DLIB_LSPI_Hh_ + +#include "lspi_abstract.h" +#include "approximate_linear_models.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename feature_extractor + > + class lspi + { + public: + typedef feature_extractor feature_extractor_type; + typedef typename feature_extractor::state_type state_type; + typedef typename feature_extractor::action_type action_type; + + explicit lspi( + const feature_extractor& fe_ + ) : fe(fe_) + { + init(); + } + + lspi( + ) + { + init(); + } + + double get_discount ( + ) const { return discount; } + + void set_discount ( + double value + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(0 < value && value <= 1, + "\t void lspi::set_discount(value)" + << "\n\t invalid inputs were given to this function" + << "\n\t value: " << value + ); + discount = value; + } + + const feature_extractor& get_feature_extractor ( + ) const { return fe; } + + void be_verbose ( + ) + { + verbose = true; + } + + void be_quiet ( + ) + { + verbose = false; + } + + void set_epsilon ( + double eps_ + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(eps_ > 0, + "\t void lspi::set_epsilon(eps_)" + << "\n\t invalid inputs were given to this function" + << "\n\t eps_: " << eps_ + ); + eps = eps_; + } + + double get_epsilon ( + ) const + { + return eps; + } + + void set_lambda ( + double lambda_ + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(lambda_ >= 0, + "\t void lspi::set_lambda(lambda_)" + << "\n\t invalid inputs were given to this function" + << "\n\t lambda_: " << lambda_ + ); + lambda = lambda_; + } + + double get_lambda ( + ) const + { + return lambda; + } + + void set_max_iterations ( + unsigned long max_iter + ) { max_iterations = max_iter; } + + unsigned long get_max_iterations ( + ) { return max_iterations; } + + template <typename vector_type> + policy<feature_extractor> train ( + const vector_type& samples + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(samples.size() > 0, + "\t policy lspi::train(samples)" + << "\n\t invalid inputs were given to this function" + ); + + matrix<double,0,1> w(fe.num_features()); + w = 0; + matrix<double,0,1> prev_w, b, f1, f2; + + matrix<double> A; + + double change; + unsigned long iter = 0; + do + { + A = identity_matrix<double>(fe.num_features())*lambda; + b = 0; + for (unsigned long i = 0; i < samples.size(); ++i) + { + fe.get_features(samples[i].state, samples[i].action, f1); + fe.get_features(samples[i].next_state, + fe.find_best_action(samples[i].next_state,w), + f2); + A += f1*trans(f1 - discount*f2); + b += f1*samples[i].reward; + } + + prev_w = w; + if (feature_extractor::force_last_weight_to_1) + w = join_cols(pinv(colm(A,range(0,A.nc()-2)))*(b-colm(A,A.nc()-1)),mat(1.0)); + else + w = pinv(A)*b; + + change = length(w-prev_w); + ++iter; + + if (verbose) + std::cout << "iteration: " << iter << "\tchange: " << change << std::endl; + + } while(change > eps && iter < max_iterations); + + return policy<feature_extractor>(w,fe); + } + + + private: + + void init() + { + lambda = 0.01; + discount = 0.8; + eps = 0.01; + verbose = false; + max_iterations = 100; + } + + double lambda; + double discount; + double eps; + bool verbose; + unsigned long max_iterations; + feature_extractor fe; + }; + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_LSPI_Hh_ + diff --git a/ml/dlib/dlib/control/lspi_abstract.h b/ml/dlib/dlib/control/lspi_abstract.h new file mode 100644 index 000000000..f262d16f4 --- /dev/null +++ b/ml/dlib/dlib/control/lspi_abstract.h @@ -0,0 +1,193 @@ +// Copyright (C) 2015 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_LSPI_ABSTRACT_Hh_ +#ifdef DLIB_LSPI_ABSTRACT_Hh_ + +#include "approximate_linear_models_abstract.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename feature_extractor + > + class lspi + { + /*! + REQUIREMENTS ON feature_extractor + feature_extractor should implement the example_feature_extractor interface + defined at the top of dlib/control/approximate_linear_models_abstract.h + + WHAT THIS OBJECT REPRESENTS + This object is an implementation of the reinforcement learning algorithm + described in the following paper: + Lagoudakis, Michail G., and Ronald Parr. "Least-squares policy + iteration." The Journal of Machine Learning Research 4 (2003): + 1107-1149. + + This means that it takes a bunch of training data in the form of + process_samples and outputs a policy that hopefully performs well when run + on the process that generated those samples. + !*/ + + public: + typedef feature_extractor feature_extractor_type; + typedef typename feature_extractor::state_type state_type; + typedef typename feature_extractor::action_type action_type; + + explicit lspi( + const feature_extractor& fe_ + ); + /*! + ensures + - #get_feature_extractor() == fe_ + - #get_lambda() == 0.01 + - #get_discount == 0.8 + - #get_epsilon() == 0.01 + - is not verbose + - #get_max_iterations() == 100 + !*/ + + lspi( + ); + /*! + ensures + - #get_feature_extractor() == feature_extractor() + (i.e. it will have its default value) + - #get_lambda() == 0.01 + - #get_discount == 0.8 + - #get_epsilon() == 0.01 + - is not verbose + - #get_max_iterations() == 100 + !*/ + + double get_discount ( + ) const; + /*! + ensures + - returns the discount applied to the sum of rewards in the Bellman + equation. + !*/ + + void set_discount ( + double value + ); + /*! + requires + - 0 < value <= 1 + ensures + - #get_discount() == value + !*/ + + const feature_extractor& get_feature_extractor ( + ) const; + /*! + ensures + - returns the feature extractor used by this object + !*/ + + void be_verbose ( + ); + /*! + ensures + - This object will print status messages to standard out so that a + user can observe the progress of the algorithm. + !*/ + + void be_quiet ( + ); + /*! + ensures + - this object will not print anything to standard out + !*/ + + void set_epsilon ( + double eps + ); + /*! + requires + - eps > 0 + ensures + - #get_epsilon() == eps + !*/ + + double get_epsilon ( + ) const; + /*! + ensures + - returns the error epsilon that determines when training should stop. + Smaller values may result in a more accurate solution but take longer to + train. + !*/ + + void set_lambda ( + double lambda_ + ); + /*! + requires + - lambda >= 0 + ensures + - #get_lambda() == lambda + !*/ + + double get_lambda ( + ) const; + /*! + ensures + - returns the regularization parameter. It is the parameter that + determines the trade off between trying to fit the training data + exactly or allowing more errors but hopefully improving the + generalization ability of the resulting function. Smaller values + encourage exact fitting while larger values of lambda may encourage + better generalization. + !*/ + + void set_max_iterations ( + unsigned long max_iter + ); + /*! + ensures + - #get_max_iterations() == max_iter + !*/ + + unsigned long get_max_iterations ( + ); + /*! + ensures + - returns the maximum number of iterations the SVM optimizer is allowed to + run before it is required to stop and return a result. + !*/ + + template < + typename vector_type + > + policy<feature_extractor> train ( + const vector_type& samples + ) const; + /*! + requires + - samples.size() > 0 + - samples is something with an interface that looks like + std::vector<process_sample<feature_extractor>>. That is, it should + be some kind of array of process_sample objects. + ensures + - Trains a policy based on the given data and returns the results. The + idea is to find a policy that will obtain the largest possible reward + when run on the process that generated the samples. In particular, + if the returned policy is P then: + - P(S) == the best action to take when in state S. + - if (feature_extractor::force_last_weight_to_1) then + - The last element of P.get_weights() is 1. + !*/ + + }; + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_LSPI_ABSTRACT_Hh_ + + diff --git a/ml/dlib/dlib/control/mpc.h b/ml/dlib/dlib/control/mpc.h new file mode 100644 index 000000000..48ef2b72d --- /dev/null +++ b/ml/dlib/dlib/control/mpc.h @@ -0,0 +1,370 @@ +// Copyright (C) 2015 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_MPC_Hh_ +#define DLIB_MPC_Hh_ + +#include "mpc_abstract.h" +#include "../matrix.h" +#include "../algs.h" + + +namespace dlib +{ + template < + long S_, + long I_, + unsigned long horizon_ + > + class mpc + { + + public: + + const static long S = S_; + const static long I = I_; + const static unsigned long horizon = horizon_; + + mpc( + ) + { + A = 0; + B = 0; + C = 0; + Q = 0; + R = 0; + lower = 0; + upper = 0; + + max_iterations = 0; + eps = 0.01; + for (unsigned long i = 0; i < horizon; ++i) + { + target[i].set_size(A.nr()); + target[i] = 0; + + controls[i].set_size(B.nc()); + controls[i] = 0; + } + lambda = 0; + } + + mpc ( + const matrix<double,S,S>& A_, + const matrix<double,S,I>& B_, + const matrix<double,S,1>& C_, + const matrix<double,S,1>& Q_, + const matrix<double,I,1>& R_, + const matrix<double,I,1>& lower_, + const matrix<double,I,1>& upper_ + ) : A(A_), B(B_), C(C_), Q(Q_), R(R_), lower(lower_), upper(upper_) + { + // make sure requires clause is not broken + DLIB_ASSERT(A.nr() > 0 && B.nc() > 0, + "\t mpc::mpc()" + << "\n\t invalid inputs were given to this function" + << "\n\t A.nr(): " << A.nr() + << "\n\t B.nc(): " << B.nc() + ); + + DLIB_ASSERT(A.nr() == A.nc() && + A.nr() == B.nr() && + A.nr() == C.nr() && + A.nr() == Q.nr(), + "\t mpc::mpc()" + << "\n\t invalid inputs were given to this function" + << "\n\t A.nr(): " << A.nr() + << "\n\t A.nc(): " << A.nc() + << "\n\t B.nr(): " << B.nr() + << "\n\t C.nr(): " << C.nr() + << "\n\t Q.nr(): " << Q.nr() + ); + DLIB_ASSERT( + B.nc() == R.nr() && + B.nc() == lower.nr() && + B.nc() == upper.nr() , + "\t mpc::mpc()" + << "\n\t invalid inputs were given to this function" + << "\n\t B.nr(): " << B.nr() + << "\n\t B.nc(): " << B.nc() + << "\n\t lower.nr(): " << lower.nr() + << "\n\t upper.nr(): " << upper.nr() + ); + DLIB_ASSERT(min(Q) >= 0 && + min(R) > 0 && + min(upper-lower) >= 0, + "\t mpc::mpc()" + << "\n\t invalid inputs were given to this function" + << "\n\t min(Q): " << min(Q) + << "\n\t min(R): " << min(R) + << "\n\t min(upper-lower): " << min(upper-lower) + ); + + + max_iterations = 10000; + eps = 0.01; + for (unsigned long i = 0; i < horizon; ++i) + { + target[i].set_size(A.nr()); + target[i] = 0; + + controls[i].set_size(B.nc()); + controls[i] = 0; + } + + // Bound the maximum eigenvalue of the hessian by computing the trace of the + // hessian matrix. + lambda = sum(R)*horizon; + matrix<double,S,S> temp = diagm(Q); + for (unsigned long c = 0; c < horizon; ++c) + { + lambda += trace(trans(B)*temp*B); + Q_diag[horizon-c-1] = diag(trans(B)*temp*B); + temp = trans(A)*temp*A + diagm(Q); + } + + } + + const matrix<double,S,S>& get_A ( + ) const { return A; } + const matrix<double,S,I>& get_B ( + ) const { return B; } + const matrix<double,S,1>& get_C ( + ) const { return C; } + const matrix<double,S,1>& get_Q ( + ) const { return Q; } + const matrix<double,I,1>& get_R ( + ) const { return R; } + const matrix<double,I,1>& get_lower_constraints ( + ) const { return lower; } + const matrix<double,I,1>& get_upper_constraints ( + ) const { return upper; } + + void set_target ( + const matrix<double,S,1>& val, + const unsigned long time + ) + { + DLIB_ASSERT(time < horizon, + "\t void mpc::set_target(eps_)" + << "\n\t invalid inputs were given to this function" + << "\n\t time: " << time + << "\n\t horizon: " << horizon + ); + + target[time] = val; + } + + void set_target ( + const matrix<double,S,1>& val + ) + { + for (unsigned long i = 0; i < horizon; ++i) + target[i] = val; + } + + void set_last_target ( + const matrix<double,S,1>& val + ) + { + set_target(val, horizon-1); + } + + const matrix<double,S,1>& get_target ( + const unsigned long time + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(time < horizon, + "\t matrix mpc::get_target(eps_)" + << "\n\t invalid inputs were given to this function" + << "\n\t time: " << time + << "\n\t horizon: " << horizon + ); + + return target[time]; + } + + unsigned long get_max_iterations ( + ) const { return max_iterations; } + + void set_max_iterations ( + unsigned long max_iter + ) + { + max_iterations = max_iter; + } + + void set_epsilon ( + double eps_ + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(eps_ > 0, + "\t void mpc::set_epsilon(eps_)" + << "\n\t invalid inputs were given to this function" + << "\n\t eps_: " << eps_ + ); + eps = eps_; + } + + double get_epsilon ( + ) const + { + return eps; + } + + matrix<double,I,1> operator() ( + const matrix<double,S,1>& current_state + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(min(R) > 0 && A.nr() == current_state.size(), + "\t matrix mpc::operator(current_state)" + << "\n\t invalid inputs were given to this function" + << "\n\t min(R): " << min(R) + << "\n\t A.nr(): " << A.nr() + << "\n\t current_state.size(): " << current_state.size() + ); + + // Shift the inputs over by one time step so we can use them to warm start the + // optimizer. + for (unsigned long i = 1; i < horizon; ++i) + controls[i-1] = controls[i]; + + solve_linear_mpc(current_state); + + for (unsigned long i = 1; i < horizon; ++i) + target[i-1] = target[i]; + + return controls[0]; + } + + private: + + + // These temporary variables here just to avoid reallocating them on each call to + // operator(). + matrix<double,S,1> M[horizon]; + matrix<double,I,1> MM[horizon]; + matrix<double,I,1> df[horizon]; + matrix<double,I,1> v[horizon]; + matrix<double,I,1> v_old[horizon]; + + void solve_linear_mpc ( + const matrix<double,S,1>& initial_state + ) + { + // make it so MM == trans(K)*Q*(M-target) + M[0] = A*initial_state + C; + for (unsigned long i = 1; i < horizon; ++i) + M[i] = A*M[i-1] + C; + for (unsigned long i = 0; i < horizon; ++i) + M[i] = diagm(Q)*(M[i]-target[i]); + for (long i = (long)horizon-2; i >= 0; --i) + M[i] += trans(A)*M[i+1]; + for (unsigned long i = 0; i < horizon; ++i) + MM[i] = trans(B)*M[i]; + + + + unsigned long iter = 0; + for (; iter < max_iterations; ++iter) + { + // compute current gradient and put it into df. + // df == H*controls + MM; + M[0] = B*controls[0]; + for (unsigned long i = 1; i < horizon; ++i) + M[i] = A*M[i-1] + B*controls[i]; + for (unsigned long i = 0; i < horizon; ++i) + M[i] = diagm(Q)*M[i]; + for (long i = (long)horizon-2; i >= 0; --i) + M[i] += trans(A)*M[i+1]; + for (unsigned long i = 0; i < horizon; ++i) + df[i] = MM[i] + trans(B)*M[i] + diagm(R)*controls[i]; + + + + // Check the stopping condition, which is the magnitude of the largest element + // of the gradient. + double max_df = 0; + unsigned long max_t = 0; + long max_v = 0; + for (unsigned long i = 0; i < horizon; ++i) + { + for (long j = 0; j < controls[i].size(); ++j) + { + // if this variable isn't an active constraint then we care about it's + // derivative. + if (!((controls[i](j) <= lower(j) && df[i](j) > 0) || + (controls[i](j) >= upper(j) && df[i](j) < 0))) + { + if (std::abs(df[i](j)) > max_df) + { + max_df = std::abs(df[i](j)); + max_t = i; + max_v = j; + } + } + } + } + if (max_df < eps) + break; + + + + // We will start out by doing a little bit of coordinate descent because it + // allows us to optimize individual variables exactly. Since we are warm + // starting each iteration with a really good solution this helps speed + // things up a lot. + const unsigned long smo_iters = 50; + if (iter < smo_iters) + { + if (Q_diag[max_t](max_v) == 0) continue; + + // Take the optimal step but just for one variable. + controls[max_t](max_v) = -(df[max_t](max_v)-Q_diag[max_t](max_v)*controls[max_t](max_v))/Q_diag[max_t](max_v); + controls[max_t](max_v) = put_in_range(lower(max_v), upper(max_v), controls[max_t](max_v)); + + // If this is the last SMO iteration then don't forget to initialize v + // for the gradient steps. + if (iter+1 == smo_iters) + { + for (unsigned long i = 0; i < horizon; ++i) + v[i] = controls[i]; + } + } + else + { + // Take a projected gradient step. + for (unsigned long i = 0; i < horizon; ++i) + { + v_old[i] = v[i]; + v[i] = dlib::clamp(controls[i] - 1.0/lambda * df[i], lower, upper); + controls[i] = dlib::clamp(v[i] + (std::sqrt(lambda)-1)/(std::sqrt(lambda)+1)*(v[i]-v_old[i]), lower, upper); + } + } + } + } + + unsigned long max_iterations; + double eps; + + matrix<double,S,S> A; + matrix<double,S,I> B; + matrix<double,S,1> C; + matrix<double,S,1> Q; + matrix<double,I,1> R; + matrix<double,I,1> lower; + matrix<double,I,1> upper; + matrix<double,S,1> target[horizon]; + + double lambda; // abound on the largest eigenvalue of the hessian matrix. + matrix<double,I,1> Q_diag[horizon]; + matrix<double,I,1> controls[horizon]; + + }; + +} + +#endif // DLIB_MPC_Hh_ + diff --git a/ml/dlib/dlib/control/mpc_abstract.h b/ml/dlib/dlib/control/mpc_abstract.h new file mode 100644 index 000000000..b4421c076 --- /dev/null +++ b/ml/dlib/dlib/control/mpc_abstract.h @@ -0,0 +1,276 @@ +// Copyright (C) 2015 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_MPC_ABSTRACT_Hh_ +#ifdef DLIB_MPC_ABSTRACT_Hh_ + +#include "../matrix.h" + +namespace dlib +{ + template < + long S_, + long I_, + unsigned long horizon_ + > + class mpc + { + /*! + REQUIREMENTS ON horizon_ + horizon_ > 0 + + REQUIREMENTS ON S_ + S_ >= 0 + + REQUIREMENTS ON I_ + I_ >= 0 + + WHAT THIS OBJECT REPRESENTS + This object implements a linear model predictive controller. To explain + what that means, suppose you have some process you want to control and the + process dynamics are described by the linear equation: + x_{i+1} = A*x_i + B*u_i + C + That is, the next state the system goes into is a linear function of its + current state (x_i) and the current control (u_i) plus some constant bias + or disturbance. + + A model predictive controller can find the control (u) you should apply to + drive the state (x) to some reference value, or alternatively to make the + state track some reference time-varying sequence. It does this by + simulating the process for horizon_ time steps and selecting the control + that leads to the best performance over the next horizon_ steps. + + To be precise, each time you ask this object for a control, it solves the + following quadratic program: + + min sum_i trans(x_i-target_i)*Q*(x_i-target_i) + trans(u_i)*R*u_i + x_i,u_i + + such that: x_0 == current_state + x_{i+1} == A*x_i + B*u_i + C + lower <= u_i <= upper + 0 <= i < horizon_ + + and reports u_0 as the control you should take given that you are currently + in current_state. Q and R are user supplied matrices that define how we + penalize variations away from the target state as well as how much we want + to avoid generating large control signals. + + Finally, the algorithm we use to solve this quadratic program is based + largely on the method described in: + A Fast Gradient method for embedded linear predictive control (2011) + by Markus Kogel and Rolf Findeisen + !*/ + + public: + + const static long S = S_; + const static long I = I_; + const static unsigned long horizon = horizon_; + + mpc( + ); + /*! + ensures + - #get_max_iterations() == 0 + - The A,B,C,Q,R,lower, and upper parameter matrices are filled with zeros. + Therefore, to use this object you must initialize it via the constructor + that supplies these parameters. + !*/ + + mpc ( + const matrix<double,S,S>& A, + const matrix<double,S,I>& B, + const matrix<double,S,1>& C, + const matrix<double,S,1>& Q, + const matrix<double,I,1>& R, + const matrix<double,I,1>& lower, + const matrix<double,I,1>& upper + ); + /*! + requires + - A.nr() > 0 + - B.nc() > 0 + - A.nr() == A.nc() == B.nr() == C.nr() == Q.nr() + - B.nc() == R.nr() == lower.nr() == upper.nr() + - min(Q) >= 0 + - min(R) > 0 + - min(upper-lower) >= 0 + ensures + - #get_A() == A + - #get_B() == B + - #get_C() == C + - #get_Q() == Q + - #get_R() == R + - #get_lower_constraints() == lower + - #get_upper_constraints() == upper + - for all valid i: + - get_target(i) == a vector of all zeros + - get_target(i).size() == A.nr() + - #get_max_iterations() == 10000 + - #get_epsilon() == 0.01 + !*/ + + const matrix<double,S,S>& get_A ( + ) const; + /*! + ensures + - returns the A matrix from the quadratic program defined above. + !*/ + + const matrix<double,S,I>& get_B ( + ) const; + /*! + ensures + - returns the B matrix from the quadratic program defined above. + !*/ + + const matrix<double,S,1>& get_C ( + ) const; + /*! + ensures + - returns the C matrix from the quadratic program defined above. + !*/ + + const matrix<double,S,1>& get_Q ( + ) const; + /*! + ensures + - returns the diagonal of the Q matrix from the quadratic program defined + above. + !*/ + + const matrix<double,I,1>& get_R ( + ) const; + /*! + ensures + - returns the diagonal of the R matrix from the quadratic program defined + above. + !*/ + + const matrix<double,I,1>& get_lower_constraints ( + ) const; + /*! + ensures + - returns the lower matrix from the quadratic program defined above. All + controls generated by this object will have values no less than this + lower bound. That is, any control u will satisfy min(u-lower) >= 0. + !*/ + + const matrix<double,I,1>& get_upper_constraints ( + ) const; + /*! + ensures + - returns the upper matrix from the quadratic program defined above. All + controls generated by this object will have values no larger than this + upper bound. That is, any control u will satisfy min(upper-u) >= 0. + !*/ + + const matrix<double,S,1>& get_target ( + const unsigned long time + ) const; + /*! + requires + - time < horizon + ensures + - This object will try to find the control sequence that results in the + process obtaining get_target(time) state at the indicated time. Note + that the next time instant after "right now" is time 0. + !*/ + + void set_target ( + const matrix<double,S,1>& val, + const unsigned long time + ); + /*! + requires + - time < horizon + ensures + - #get_target(time) == val + !*/ + + void set_target ( + const matrix<double,S,1>& val + ); + /*! + ensures + - for all valid t: + - #get_target(t) == val + !*/ + + void set_last_target ( + const matrix<double,S,1>& val + ); + /*! + ensures + - performs: set_target(val, horizon-1) + !*/ + + unsigned long get_max_iterations ( + ) const; + /*! + ensures + - When operator() is called it solves an optimization problem to + get_epsilon() precision to determine the next control action. In + particular, we run the optimizer until the magnitude of each element of + the gradient vector is less than get_epsilon() or until + get_max_iterations() solver iterations have been executed. + !*/ + + void set_max_iterations ( + unsigned long max_iter + ); + /*! + ensures + - #get_max_iterations() == max_iter + !*/ + + void set_epsilon ( + double eps + ); + /*! + requires + - eps > 0 + ensures + - #get_epsilon() == eps + !*/ + + double get_epsilon ( + ) const; + /*! + ensures + - When operator() is called it solves an optimization problem to + get_epsilon() precision to determine the next control action. In + particular, we run the optimizer until the magnitude of each element of + the gradient vector is less than get_epsilon() or until + get_max_iterations() solver iterations have been executed. This means + that smaller epsilon values will give more accurate outputs but may take + longer to compute. + !*/ + + matrix<double,I,1> operator() ( + const matrix<double,S,1>& current_state + ); + /*! + requires + - min(R) > 0 + - A.nr() == current_state.size() + ensures + - Solves the model predictive control problem defined by the arguments to + this objects constructor, assuming that the starting state is given by + current_state. Then we return the control that should be taken in the + current state that best optimizes the quadratic objective function + defined above. + - We also shift over the target states so that you only need to update the + last one (if you are using non-zero target states) via a call to + set_last_target()). In particular, for all valid t, it will be the case + that: + - #get_target(t) == get_target(t+1) + - #get_target(horizon-1) == get_target(horizon-1) + !*/ + + }; + +} + +#endif // DLIB_MPC_ABSTRACT_Hh_ + |