diff options
Diffstat (limited to 'ml/dlib/dlib/control')
-rw-r--r-- | ml/dlib/dlib/control/approximate_linear_models.h | 128 | ||||
-rw-r--r-- | ml/dlib/dlib/control/approximate_linear_models_abstract.h | 213 | ||||
-rw-r--r-- | ml/dlib/dlib/control/lspi.h | 188 | ||||
-rw-r--r-- | ml/dlib/dlib/control/lspi_abstract.h | 193 | ||||
-rw-r--r-- | ml/dlib/dlib/control/mpc.h | 370 | ||||
-rw-r--r-- | ml/dlib/dlib/control/mpc_abstract.h | 276 |
6 files changed, 0 insertions, 1368 deletions
diff --git a/ml/dlib/dlib/control/approximate_linear_models.h b/ml/dlib/dlib/control/approximate_linear_models.h deleted file mode 100644 index 9732d71e9..000000000 --- a/ml/dlib/dlib/control/approximate_linear_models.h +++ /dev/null @@ -1,128 +0,0 @@ -// Copyright (C) 2015 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_APPROXIMATE_LINEAR_MODELS_Hh_ -#define DLIB_APPROXIMATE_LINEAR_MODELS_Hh_ - -#include "approximate_linear_models_abstract.h" -#include "../matrix.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - struct process_sample - { - typedef feature_extractor feature_extractor_type; - typedef typename feature_extractor::state_type state_type; - typedef typename feature_extractor::action_type action_type; - - process_sample(){} - - process_sample( - const state_type& s, - const action_type& a, - const state_type& n, - const double& r - ) : state(s), action(a), next_state(n), reward(r) {} - - state_type state; - action_type action; - state_type next_state; - double reward; - }; - - template < typename feature_extractor > - void serialize (const process_sample<feature_extractor>& item, std::ostream& out) - { - serialize(item.state, out); - serialize(item.action, out); - serialize(item.next_state, out); - serialize(item.reward, out); - } - - template < typename feature_extractor > - void deserialize (process_sample<feature_extractor>& item, std::istream& in) - { - deserialize(item.state, in); - deserialize(item.action, in); - deserialize(item.next_state, in); - deserialize(item.reward, in); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - class policy - { - public: - - typedef feature_extractor feature_extractor_type; - typedef typename feature_extractor::state_type state_type; - typedef typename feature_extractor::action_type action_type; - - - policy ( - ) - { - w.set_size(fe.num_features()); - w = 0; - } - - policy ( - const matrix<double,0,1>& weights_, - const feature_extractor& fe_ - ) : w(weights_), fe(fe_) {} - - action_type operator() ( - const state_type& state - ) const - { - return fe.find_best_action(state,w); - } - - const feature_extractor& get_feature_extractor ( - ) const { return fe; } - - const matrix<double,0,1>& get_weights ( - ) const { return w; } - - - private: - matrix<double,0,1> w; - feature_extractor fe; - }; - - template < typename feature_extractor > - inline void serialize(const policy<feature_extractor>& item, std::ostream& out) - { - int version = 1; - serialize(version, out); - serialize(item.get_feature_extractor(), out); - serialize(item.get_weights(), out); - } - template < typename feature_extractor > - inline void deserialize(policy<feature_extractor>& item, std::istream& in) - { - int version = 0; - deserialize(version, in); - if (version != 1) - throw serialization_error("Unexpected version found while deserializing dlib::policy object."); - feature_extractor fe; - matrix<double,0,1> w; - deserialize(fe, in); - deserialize(w, in); - item = policy<feature_extractor>(w,fe); - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_APPROXIMATE_LINEAR_MODELS_Hh_ - diff --git a/ml/dlib/dlib/control/approximate_linear_models_abstract.h b/ml/dlib/dlib/control/approximate_linear_models_abstract.h deleted file mode 100644 index 59dac4276..000000000 --- a/ml/dlib/dlib/control/approximate_linear_models_abstract.h +++ /dev/null @@ -1,213 +0,0 @@ -// Copyright (C) 2015 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_APPROXIMATE_LINEAR_MODELS_ABSTRACT_Hh_ -#ifdef DLIB_APPROXIMATE_LINEAR_MODELS_ABSTRACT_Hh_ - -#include "../matrix.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - struct example_feature_extractor - { - /*! - WHAT THIS OBJECT REPRESENTS - This object defines the interface a feature extractor must implement if it - is to be used with the process_sample and policy objects defined at the - bottom of this file. Moreover, it is meant to represent the core part - of a model used in a reinforcement learning algorithm. - - In particular, this object models a Q(state,action) function where - Q(state,action) == dot(w, PSI(state,action)) - where PSI(state,action) is a feature vector and w is a parameter - vector. - - Therefore, a feature extractor defines how the PSI(x,y) feature vector is - calculated. It also defines the types used to represent the state and - action objects. - - - THREAD SAFETY - Instances of this object are required to be threadsafe, that is, it should - be safe for multiple threads to make concurrent calls to the member - functions of this object. - !*/ - - // The state and actions can be any types so long as you provide typedefs for them. - typedef T state_type; - typedef U action_type; - // We can also say that the last element in the weight vector w must be 1. This - // can be useful for including a prior into your model. - const static bool force_last_weight_to_1 = false; - - example_feature_extractor( - ); - /*! - ensures - - this object is properly initialized. - !*/ - - unsigned long num_features( - ) const; - /*! - ensures - - returns the dimensionality of the PSI() feature vector. - !*/ - - action_type find_best_action ( - const state_type& state, - const matrix<double,0,1>& w - ) const; - /*! - ensures - - returns the action A that maximizes Q(state,A) = dot(w,PSI(state,A)). - That is, this function finds the best action to take in the given state - when our model is parameterized by the given weight vector w. - !*/ - - void get_features ( - const state_type& state, - const action_type& action, - matrix<double,0,1>& feats - ) const; - /*! - ensures - - #feats.size() == num_features() - - #feats == PSI(state,action) - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - struct process_sample - { - /*! - REQUIREMENTS ON feature_extractor - feature_extractor should implement the example_feature_extractor interface - defined at the top of this file. - - WHAT THIS OBJECT REPRESENTS - This object holds a training sample for a reinforcement learning algorithm. - In particular, it should be a sample from some process where the process - was in state this->state, then took this->action action which resulted in - receiving this->reward and ending up in the state this->next_state. - !*/ - - typedef feature_extractor feature_extractor_type; - typedef typename feature_extractor::state_type state_type; - typedef typename feature_extractor::action_type action_type; - - process_sample(){} - - process_sample( - const state_type& s, - const action_type& a, - const state_type& n, - const double& r - ) : state(s), action(a), next_state(n), reward(r) {} - - state_type state; - action_type action; - state_type next_state; - double reward; - }; - - template < typename feature_extractor > - void serialize (const process_sample<feature_extractor>& item, std::ostream& out); - template < typename feature_extractor > - void deserialize (process_sample<feature_extractor>& item, std::istream& in); - /*! - provides serialization support. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - class policy - { - /*! - REQUIREMENTS ON feature_extractor - feature_extractor should implement the example_feature_extractor interface - defined at the top of this file. - - WHAT THIS OBJECT REPRESENTS - This is a policy based on the supplied feature_extractor model. In - particular, it maps from feature_extractor::state_type to the best action - to take in that state. - !*/ - - public: - - typedef feature_extractor feature_extractor_type; - typedef typename feature_extractor::state_type state_type; - typedef typename feature_extractor::action_type action_type; - - - policy ( - ); - /*! - ensures - - #get_feature_extractor() == feature_extractor() - (i.e. it will have its default value) - - #get_weights().size() == #get_feature_extractor().num_features() - - #get_weights() == 0 - !*/ - - policy ( - const matrix<double,0,1>& weights, - const feature_extractor& fe - ); - /*! - requires - - fe.num_features() == weights.size() - ensures - - #get_feature_extractor() == fe - - #get_weights() == weights - !*/ - - action_type operator() ( - const state_type& state - ) const; - /*! - ensures - - returns get_feature_extractor().find_best_action(state,w); - !*/ - - const feature_extractor& get_feature_extractor ( - ) const; - /*! - ensures - - returns the feature extractor used by this object - !*/ - - const matrix<double,0,1>& get_weights ( - ) const; - /*! - ensures - - returns the parameter vector (w) associated with this object. The length - of the vector is get_feature_extractor().num_features(). - !*/ - - }; - - template < typename feature_extractor > - void serialize(const policy<feature_extractor>& item, std::ostream& out); - template < typename feature_extractor > - void deserialize(policy<feature_extractor>& item, std::istream& in); - /*! - provides serialization support. - !*/ - -// ---------------------------------------------------------------------------------------- - - -#endif // DLIB_APPROXIMATE_LINEAR_MODELS_ABSTRACT_Hh_ - diff --git a/ml/dlib/dlib/control/lspi.h b/ml/dlib/dlib/control/lspi.h deleted file mode 100644 index b21a501d2..000000000 --- a/ml/dlib/dlib/control/lspi.h +++ /dev/null @@ -1,188 +0,0 @@ -// Copyright (C) 2015 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_LSPI_Hh_ -#define DLIB_LSPI_Hh_ - -#include "lspi_abstract.h" -#include "approximate_linear_models.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - class lspi - { - public: - typedef feature_extractor feature_extractor_type; - typedef typename feature_extractor::state_type state_type; - typedef typename feature_extractor::action_type action_type; - - explicit lspi( - const feature_extractor& fe_ - ) : fe(fe_) - { - init(); - } - - lspi( - ) - { - init(); - } - - double get_discount ( - ) const { return discount; } - - void set_discount ( - double value - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(0 < value && value <= 1, - "\t void lspi::set_discount(value)" - << "\n\t invalid inputs were given to this function" - << "\n\t value: " << value - ); - discount = value; - } - - const feature_extractor& get_feature_extractor ( - ) const { return fe; } - - void be_verbose ( - ) - { - verbose = true; - } - - void be_quiet ( - ) - { - verbose = false; - } - - void set_epsilon ( - double eps_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(eps_ > 0, - "\t void lspi::set_epsilon(eps_)" - << "\n\t invalid inputs were given to this function" - << "\n\t eps_: " << eps_ - ); - eps = eps_; - } - - double get_epsilon ( - ) const - { - return eps; - } - - void set_lambda ( - double lambda_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(lambda_ >= 0, - "\t void lspi::set_lambda(lambda_)" - << "\n\t invalid inputs were given to this function" - << "\n\t lambda_: " << lambda_ - ); - lambda = lambda_; - } - - double get_lambda ( - ) const - { - return lambda; - } - - void set_max_iterations ( - unsigned long max_iter - ) { max_iterations = max_iter; } - - unsigned long get_max_iterations ( - ) { return max_iterations; } - - template <typename vector_type> - policy<feature_extractor> train ( - const vector_type& samples - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(samples.size() > 0, - "\t policy lspi::train(samples)" - << "\n\t invalid inputs were given to this function" - ); - - matrix<double,0,1> w(fe.num_features()); - w = 0; - matrix<double,0,1> prev_w, b, f1, f2; - - matrix<double> A; - - double change; - unsigned long iter = 0; - do - { - A = identity_matrix<double>(fe.num_features())*lambda; - b = 0; - for (unsigned long i = 0; i < samples.size(); ++i) - { - fe.get_features(samples[i].state, samples[i].action, f1); - fe.get_features(samples[i].next_state, - fe.find_best_action(samples[i].next_state,w), - f2); - A += f1*trans(f1 - discount*f2); - b += f1*samples[i].reward; - } - - prev_w = w; - if (feature_extractor::force_last_weight_to_1) - w = join_cols(pinv(colm(A,range(0,A.nc()-2)))*(b-colm(A,A.nc()-1)),mat(1.0)); - else - w = pinv(A)*b; - - change = length(w-prev_w); - ++iter; - - if (verbose) - std::cout << "iteration: " << iter << "\tchange: " << change << std::endl; - - } while(change > eps && iter < max_iterations); - - return policy<feature_extractor>(w,fe); - } - - - private: - - void init() - { - lambda = 0.01; - discount = 0.8; - eps = 0.01; - verbose = false; - max_iterations = 100; - } - - double lambda; - double discount; - double eps; - bool verbose; - unsigned long max_iterations; - feature_extractor fe; - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_LSPI_Hh_ - diff --git a/ml/dlib/dlib/control/lspi_abstract.h b/ml/dlib/dlib/control/lspi_abstract.h deleted file mode 100644 index f262d16f4..000000000 --- a/ml/dlib/dlib/control/lspi_abstract.h +++ /dev/null @@ -1,193 +0,0 @@ -// Copyright (C) 2015 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_LSPI_ABSTRACT_Hh_ -#ifdef DLIB_LSPI_ABSTRACT_Hh_ - -#include "approximate_linear_models_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - class lspi - { - /*! - REQUIREMENTS ON feature_extractor - feature_extractor should implement the example_feature_extractor interface - defined at the top of dlib/control/approximate_linear_models_abstract.h - - WHAT THIS OBJECT REPRESENTS - This object is an implementation of the reinforcement learning algorithm - described in the following paper: - Lagoudakis, Michail G., and Ronald Parr. "Least-squares policy - iteration." The Journal of Machine Learning Research 4 (2003): - 1107-1149. - - This means that it takes a bunch of training data in the form of - process_samples and outputs a policy that hopefully performs well when run - on the process that generated those samples. - !*/ - - public: - typedef feature_extractor feature_extractor_type; - typedef typename feature_extractor::state_type state_type; - typedef typename feature_extractor::action_type action_type; - - explicit lspi( - const feature_extractor& fe_ - ); - /*! - ensures - - #get_feature_extractor() == fe_ - - #get_lambda() == 0.01 - - #get_discount == 0.8 - - #get_epsilon() == 0.01 - - is not verbose - - #get_max_iterations() == 100 - !*/ - - lspi( - ); - /*! - ensures - - #get_feature_extractor() == feature_extractor() - (i.e. it will have its default value) - - #get_lambda() == 0.01 - - #get_discount == 0.8 - - #get_epsilon() == 0.01 - - is not verbose - - #get_max_iterations() == 100 - !*/ - - double get_discount ( - ) const; - /*! - ensures - - returns the discount applied to the sum of rewards in the Bellman - equation. - !*/ - - void set_discount ( - double value - ); - /*! - requires - - 0 < value <= 1 - ensures - - #get_discount() == value - !*/ - - const feature_extractor& get_feature_extractor ( - ) const; - /*! - ensures - - returns the feature extractor used by this object - !*/ - - void be_verbose ( - ); - /*! - ensures - - This object will print status messages to standard out so that a - user can observe the progress of the algorithm. - !*/ - - void be_quiet ( - ); - /*! - ensures - - this object will not print anything to standard out - !*/ - - void set_epsilon ( - double eps - ); - /*! - requires - - eps > 0 - ensures - - #get_epsilon() == eps - !*/ - - double get_epsilon ( - ) const; - /*! - ensures - - returns the error epsilon that determines when training should stop. - Smaller values may result in a more accurate solution but take longer to - train. - !*/ - - void set_lambda ( - double lambda_ - ); - /*! - requires - - lambda >= 0 - ensures - - #get_lambda() == lambda - !*/ - - double get_lambda ( - ) const; - /*! - ensures - - returns the regularization parameter. It is the parameter that - determines the trade off between trying to fit the training data - exactly or allowing more errors but hopefully improving the - generalization ability of the resulting function. Smaller values - encourage exact fitting while larger values of lambda may encourage - better generalization. - !*/ - - void set_max_iterations ( - unsigned long max_iter - ); - /*! - ensures - - #get_max_iterations() == max_iter - !*/ - - unsigned long get_max_iterations ( - ); - /*! - ensures - - returns the maximum number of iterations the SVM optimizer is allowed to - run before it is required to stop and return a result. - !*/ - - template < - typename vector_type - > - policy<feature_extractor> train ( - const vector_type& samples - ) const; - /*! - requires - - samples.size() > 0 - - samples is something with an interface that looks like - std::vector<process_sample<feature_extractor>>. That is, it should - be some kind of array of process_sample objects. - ensures - - Trains a policy based on the given data and returns the results. The - idea is to find a policy that will obtain the largest possible reward - when run on the process that generated the samples. In particular, - if the returned policy is P then: - - P(S) == the best action to take when in state S. - - if (feature_extractor::force_last_weight_to_1) then - - The last element of P.get_weights() is 1. - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_LSPI_ABSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/control/mpc.h b/ml/dlib/dlib/control/mpc.h deleted file mode 100644 index 48ef2b72d..000000000 --- a/ml/dlib/dlib/control/mpc.h +++ /dev/null @@ -1,370 +0,0 @@ -// Copyright (C) 2015 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_MPC_Hh_ -#define DLIB_MPC_Hh_ - -#include "mpc_abstract.h" -#include "../matrix.h" -#include "../algs.h" - - -namespace dlib -{ - template < - long S_, - long I_, - unsigned long horizon_ - > - class mpc - { - - public: - - const static long S = S_; - const static long I = I_; - const static unsigned long horizon = horizon_; - - mpc( - ) - { - A = 0; - B = 0; - C = 0; - Q = 0; - R = 0; - lower = 0; - upper = 0; - - max_iterations = 0; - eps = 0.01; - for (unsigned long i = 0; i < horizon; ++i) - { - target[i].set_size(A.nr()); - target[i] = 0; - - controls[i].set_size(B.nc()); - controls[i] = 0; - } - lambda = 0; - } - - mpc ( - const matrix<double,S,S>& A_, - const matrix<double,S,I>& B_, - const matrix<double,S,1>& C_, - const matrix<double,S,1>& Q_, - const matrix<double,I,1>& R_, - const matrix<double,I,1>& lower_, - const matrix<double,I,1>& upper_ - ) : A(A_), B(B_), C(C_), Q(Q_), R(R_), lower(lower_), upper(upper_) - { - // make sure requires clause is not broken - DLIB_ASSERT(A.nr() > 0 && B.nc() > 0, - "\t mpc::mpc()" - << "\n\t invalid inputs were given to this function" - << "\n\t A.nr(): " << A.nr() - << "\n\t B.nc(): " << B.nc() - ); - - DLIB_ASSERT(A.nr() == A.nc() && - A.nr() == B.nr() && - A.nr() == C.nr() && - A.nr() == Q.nr(), - "\t mpc::mpc()" - << "\n\t invalid inputs were given to this function" - << "\n\t A.nr(): " << A.nr() - << "\n\t A.nc(): " << A.nc() - << "\n\t B.nr(): " << B.nr() - << "\n\t C.nr(): " << C.nr() - << "\n\t Q.nr(): " << Q.nr() - ); - DLIB_ASSERT( - B.nc() == R.nr() && - B.nc() == lower.nr() && - B.nc() == upper.nr() , - "\t mpc::mpc()" - << "\n\t invalid inputs were given to this function" - << "\n\t B.nr(): " << B.nr() - << "\n\t B.nc(): " << B.nc() - << "\n\t lower.nr(): " << lower.nr() - << "\n\t upper.nr(): " << upper.nr() - ); - DLIB_ASSERT(min(Q) >= 0 && - min(R) > 0 && - min(upper-lower) >= 0, - "\t mpc::mpc()" - << "\n\t invalid inputs were given to this function" - << "\n\t min(Q): " << min(Q) - << "\n\t min(R): " << min(R) - << "\n\t min(upper-lower): " << min(upper-lower) - ); - - - max_iterations = 10000; - eps = 0.01; - for (unsigned long i = 0; i < horizon; ++i) - { - target[i].set_size(A.nr()); - target[i] = 0; - - controls[i].set_size(B.nc()); - controls[i] = 0; - } - - // Bound the maximum eigenvalue of the hessian by computing the trace of the - // hessian matrix. - lambda = sum(R)*horizon; - matrix<double,S,S> temp = diagm(Q); - for (unsigned long c = 0; c < horizon; ++c) - { - lambda += trace(trans(B)*temp*B); - Q_diag[horizon-c-1] = diag(trans(B)*temp*B); - temp = trans(A)*temp*A + diagm(Q); - } - - } - - const matrix<double,S,S>& get_A ( - ) const { return A; } - const matrix<double,S,I>& get_B ( - ) const { return B; } - const matrix<double,S,1>& get_C ( - ) const { return C; } - const matrix<double,S,1>& get_Q ( - ) const { return Q; } - const matrix<double,I,1>& get_R ( - ) const { return R; } - const matrix<double,I,1>& get_lower_constraints ( - ) const { return lower; } - const matrix<double,I,1>& get_upper_constraints ( - ) const { return upper; } - - void set_target ( - const matrix<double,S,1>& val, - const unsigned long time - ) - { - DLIB_ASSERT(time < horizon, - "\t void mpc::set_target(eps_)" - << "\n\t invalid inputs were given to this function" - << "\n\t time: " << time - << "\n\t horizon: " << horizon - ); - - target[time] = val; - } - - void set_target ( - const matrix<double,S,1>& val - ) - { - for (unsigned long i = 0; i < horizon; ++i) - target[i] = val; - } - - void set_last_target ( - const matrix<double,S,1>& val - ) - { - set_target(val, horizon-1); - } - - const matrix<double,S,1>& get_target ( - const unsigned long time - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(time < horizon, - "\t matrix mpc::get_target(eps_)" - << "\n\t invalid inputs were given to this function" - << "\n\t time: " << time - << "\n\t horizon: " << horizon - ); - - return target[time]; - } - - unsigned long get_max_iterations ( - ) const { return max_iterations; } - - void set_max_iterations ( - unsigned long max_iter - ) - { - max_iterations = max_iter; - } - - void set_epsilon ( - double eps_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(eps_ > 0, - "\t void mpc::set_epsilon(eps_)" - << "\n\t invalid inputs were given to this function" - << "\n\t eps_: " << eps_ - ); - eps = eps_; - } - - double get_epsilon ( - ) const - { - return eps; - } - - matrix<double,I,1> operator() ( - const matrix<double,S,1>& current_state - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(min(R) > 0 && A.nr() == current_state.size(), - "\t matrix mpc::operator(current_state)" - << "\n\t invalid inputs were given to this function" - << "\n\t min(R): " << min(R) - << "\n\t A.nr(): " << A.nr() - << "\n\t current_state.size(): " << current_state.size() - ); - - // Shift the inputs over by one time step so we can use them to warm start the - // optimizer. - for (unsigned long i = 1; i < horizon; ++i) - controls[i-1] = controls[i]; - - solve_linear_mpc(current_state); - - for (unsigned long i = 1; i < horizon; ++i) - target[i-1] = target[i]; - - return controls[0]; - } - - private: - - - // These temporary variables here just to avoid reallocating them on each call to - // operator(). - matrix<double,S,1> M[horizon]; - matrix<double,I,1> MM[horizon]; - matrix<double,I,1> df[horizon]; - matrix<double,I,1> v[horizon]; - matrix<double,I,1> v_old[horizon]; - - void solve_linear_mpc ( - const matrix<double,S,1>& initial_state - ) - { - // make it so MM == trans(K)*Q*(M-target) - M[0] = A*initial_state + C; - for (unsigned long i = 1; i < horizon; ++i) - M[i] = A*M[i-1] + C; - for (unsigned long i = 0; i < horizon; ++i) - M[i] = diagm(Q)*(M[i]-target[i]); - for (long i = (long)horizon-2; i >= 0; --i) - M[i] += trans(A)*M[i+1]; - for (unsigned long i = 0; i < horizon; ++i) - MM[i] = trans(B)*M[i]; - - - - unsigned long iter = 0; - for (; iter < max_iterations; ++iter) - { - // compute current gradient and put it into df. - // df == H*controls + MM; - M[0] = B*controls[0]; - for (unsigned long i = 1; i < horizon; ++i) - M[i] = A*M[i-1] + B*controls[i]; - for (unsigned long i = 0; i < horizon; ++i) - M[i] = diagm(Q)*M[i]; - for (long i = (long)horizon-2; i >= 0; --i) - M[i] += trans(A)*M[i+1]; - for (unsigned long i = 0; i < horizon; ++i) - df[i] = MM[i] + trans(B)*M[i] + diagm(R)*controls[i]; - - - - // Check the stopping condition, which is the magnitude of the largest element - // of the gradient. - double max_df = 0; - unsigned long max_t = 0; - long max_v = 0; - for (unsigned long i = 0; i < horizon; ++i) - { - for (long j = 0; j < controls[i].size(); ++j) - { - // if this variable isn't an active constraint then we care about it's - // derivative. - if (!((controls[i](j) <= lower(j) && df[i](j) > 0) || - (controls[i](j) >= upper(j) && df[i](j) < 0))) - { - if (std::abs(df[i](j)) > max_df) - { - max_df = std::abs(df[i](j)); - max_t = i; - max_v = j; - } - } - } - } - if (max_df < eps) - break; - - - - // We will start out by doing a little bit of coordinate descent because it - // allows us to optimize individual variables exactly. Since we are warm - // starting each iteration with a really good solution this helps speed - // things up a lot. - const unsigned long smo_iters = 50; - if (iter < smo_iters) - { - if (Q_diag[max_t](max_v) == 0) continue; - - // Take the optimal step but just for one variable. - controls[max_t](max_v) = -(df[max_t](max_v)-Q_diag[max_t](max_v)*controls[max_t](max_v))/Q_diag[max_t](max_v); - controls[max_t](max_v) = put_in_range(lower(max_v), upper(max_v), controls[max_t](max_v)); - - // If this is the last SMO iteration then don't forget to initialize v - // for the gradient steps. - if (iter+1 == smo_iters) - { - for (unsigned long i = 0; i < horizon; ++i) - v[i] = controls[i]; - } - } - else - { - // Take a projected gradient step. - for (unsigned long i = 0; i < horizon; ++i) - { - v_old[i] = v[i]; - v[i] = dlib::clamp(controls[i] - 1.0/lambda * df[i], lower, upper); - controls[i] = dlib::clamp(v[i] + (std::sqrt(lambda)-1)/(std::sqrt(lambda)+1)*(v[i]-v_old[i]), lower, upper); - } - } - } - } - - unsigned long max_iterations; - double eps; - - matrix<double,S,S> A; - matrix<double,S,I> B; - matrix<double,S,1> C; - matrix<double,S,1> Q; - matrix<double,I,1> R; - matrix<double,I,1> lower; - matrix<double,I,1> upper; - matrix<double,S,1> target[horizon]; - - double lambda; // abound on the largest eigenvalue of the hessian matrix. - matrix<double,I,1> Q_diag[horizon]; - matrix<double,I,1> controls[horizon]; - - }; - -} - -#endif // DLIB_MPC_Hh_ - diff --git a/ml/dlib/dlib/control/mpc_abstract.h b/ml/dlib/dlib/control/mpc_abstract.h deleted file mode 100644 index b4421c076..000000000 --- a/ml/dlib/dlib/control/mpc_abstract.h +++ /dev/null @@ -1,276 +0,0 @@ -// Copyright (C) 2015 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_MPC_ABSTRACT_Hh_ -#ifdef DLIB_MPC_ABSTRACT_Hh_ - -#include "../matrix.h" - -namespace dlib -{ - template < - long S_, - long I_, - unsigned long horizon_ - > - class mpc - { - /*! - REQUIREMENTS ON horizon_ - horizon_ > 0 - - REQUIREMENTS ON S_ - S_ >= 0 - - REQUIREMENTS ON I_ - I_ >= 0 - - WHAT THIS OBJECT REPRESENTS - This object implements a linear model predictive controller. To explain - what that means, suppose you have some process you want to control and the - process dynamics are described by the linear equation: - x_{i+1} = A*x_i + B*u_i + C - That is, the next state the system goes into is a linear function of its - current state (x_i) and the current control (u_i) plus some constant bias - or disturbance. - - A model predictive controller can find the control (u) you should apply to - drive the state (x) to some reference value, or alternatively to make the - state track some reference time-varying sequence. It does this by - simulating the process for horizon_ time steps and selecting the control - that leads to the best performance over the next horizon_ steps. - - To be precise, each time you ask this object for a control, it solves the - following quadratic program: - - min sum_i trans(x_i-target_i)*Q*(x_i-target_i) + trans(u_i)*R*u_i - x_i,u_i - - such that: x_0 == current_state - x_{i+1} == A*x_i + B*u_i + C - lower <= u_i <= upper - 0 <= i < horizon_ - - and reports u_0 as the control you should take given that you are currently - in current_state. Q and R are user supplied matrices that define how we - penalize variations away from the target state as well as how much we want - to avoid generating large control signals. - - Finally, the algorithm we use to solve this quadratic program is based - largely on the method described in: - A Fast Gradient method for embedded linear predictive control (2011) - by Markus Kogel and Rolf Findeisen - !*/ - - public: - - const static long S = S_; - const static long I = I_; - const static unsigned long horizon = horizon_; - - mpc( - ); - /*! - ensures - - #get_max_iterations() == 0 - - The A,B,C,Q,R,lower, and upper parameter matrices are filled with zeros. - Therefore, to use this object you must initialize it via the constructor - that supplies these parameters. - !*/ - - mpc ( - const matrix<double,S,S>& A, - const matrix<double,S,I>& B, - const matrix<double,S,1>& C, - const matrix<double,S,1>& Q, - const matrix<double,I,1>& R, - const matrix<double,I,1>& lower, - const matrix<double,I,1>& upper - ); - /*! - requires - - A.nr() > 0 - - B.nc() > 0 - - A.nr() == A.nc() == B.nr() == C.nr() == Q.nr() - - B.nc() == R.nr() == lower.nr() == upper.nr() - - min(Q) >= 0 - - min(R) > 0 - - min(upper-lower) >= 0 - ensures - - #get_A() == A - - #get_B() == B - - #get_C() == C - - #get_Q() == Q - - #get_R() == R - - #get_lower_constraints() == lower - - #get_upper_constraints() == upper - - for all valid i: - - get_target(i) == a vector of all zeros - - get_target(i).size() == A.nr() - - #get_max_iterations() == 10000 - - #get_epsilon() == 0.01 - !*/ - - const matrix<double,S,S>& get_A ( - ) const; - /*! - ensures - - returns the A matrix from the quadratic program defined above. - !*/ - - const matrix<double,S,I>& get_B ( - ) const; - /*! - ensures - - returns the B matrix from the quadratic program defined above. - !*/ - - const matrix<double,S,1>& get_C ( - ) const; - /*! - ensures - - returns the C matrix from the quadratic program defined above. - !*/ - - const matrix<double,S,1>& get_Q ( - ) const; - /*! - ensures - - returns the diagonal of the Q matrix from the quadratic program defined - above. - !*/ - - const matrix<double,I,1>& get_R ( - ) const; - /*! - ensures - - returns the diagonal of the R matrix from the quadratic program defined - above. - !*/ - - const matrix<double,I,1>& get_lower_constraints ( - ) const; - /*! - ensures - - returns the lower matrix from the quadratic program defined above. All - controls generated by this object will have values no less than this - lower bound. That is, any control u will satisfy min(u-lower) >= 0. - !*/ - - const matrix<double,I,1>& get_upper_constraints ( - ) const; - /*! - ensures - - returns the upper matrix from the quadratic program defined above. All - controls generated by this object will have values no larger than this - upper bound. That is, any control u will satisfy min(upper-u) >= 0. - !*/ - - const matrix<double,S,1>& get_target ( - const unsigned long time - ) const; - /*! - requires - - time < horizon - ensures - - This object will try to find the control sequence that results in the - process obtaining get_target(time) state at the indicated time. Note - that the next time instant after "right now" is time 0. - !*/ - - void set_target ( - const matrix<double,S,1>& val, - const unsigned long time - ); - /*! - requires - - time < horizon - ensures - - #get_target(time) == val - !*/ - - void set_target ( - const matrix<double,S,1>& val - ); - /*! - ensures - - for all valid t: - - #get_target(t) == val - !*/ - - void set_last_target ( - const matrix<double,S,1>& val - ); - /*! - ensures - - performs: set_target(val, horizon-1) - !*/ - - unsigned long get_max_iterations ( - ) const; - /*! - ensures - - When operator() is called it solves an optimization problem to - get_epsilon() precision to determine the next control action. In - particular, we run the optimizer until the magnitude of each element of - the gradient vector is less than get_epsilon() or until - get_max_iterations() solver iterations have been executed. - !*/ - - void set_max_iterations ( - unsigned long max_iter - ); - /*! - ensures - - #get_max_iterations() == max_iter - !*/ - - void set_epsilon ( - double eps - ); - /*! - requires - - eps > 0 - ensures - - #get_epsilon() == eps - !*/ - - double get_epsilon ( - ) const; - /*! - ensures - - When operator() is called it solves an optimization problem to - get_epsilon() precision to determine the next control action. In - particular, we run the optimizer until the magnitude of each element of - the gradient vector is less than get_epsilon() or until - get_max_iterations() solver iterations have been executed. This means - that smaller epsilon values will give more accurate outputs but may take - longer to compute. - !*/ - - matrix<double,I,1> operator() ( - const matrix<double,S,1>& current_state - ); - /*! - requires - - min(R) > 0 - - A.nr() == current_state.size() - ensures - - Solves the model predictive control problem defined by the arguments to - this objects constructor, assuming that the starting state is given by - current_state. Then we return the control that should be taken in the - current state that best optimizes the quadratic objective function - defined above. - - We also shift over the target states so that you only need to update the - last one (if you are using non-zero target states) via a call to - set_last_target()). In particular, for all valid t, it will be the case - that: - - #get_target(t) == get_target(t+1) - - #get_target(horizon-1) == get_target(horizon-1) - !*/ - - }; - -} - -#endif // DLIB_MPC_ABSTRACT_Hh_ - |