summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/control
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/dlib/control')
-rw-r--r--ml/dlib/dlib/control/approximate_linear_models.h128
-rw-r--r--ml/dlib/dlib/control/approximate_linear_models_abstract.h213
-rw-r--r--ml/dlib/dlib/control/lspi.h188
-rw-r--r--ml/dlib/dlib/control/lspi_abstract.h193
-rw-r--r--ml/dlib/dlib/control/mpc.h370
-rw-r--r--ml/dlib/dlib/control/mpc_abstract.h276
6 files changed, 0 insertions, 1368 deletions
diff --git a/ml/dlib/dlib/control/approximate_linear_models.h b/ml/dlib/dlib/control/approximate_linear_models.h
deleted file mode 100644
index 9732d71e9..000000000
--- a/ml/dlib/dlib/control/approximate_linear_models.h
+++ /dev/null
@@ -1,128 +0,0 @@
-// Copyright (C) 2015 Davis E. King (davis@dlib.net)
-// License: Boost Software License See LICENSE.txt for the full license.
-#ifndef DLIB_APPROXIMATE_LINEAR_MODELS_Hh_
-#define DLIB_APPROXIMATE_LINEAR_MODELS_Hh_
-
-#include "approximate_linear_models_abstract.h"
-#include "../matrix.h"
-
-namespace dlib
-{
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename feature_extractor
- >
- struct process_sample
- {
- typedef feature_extractor feature_extractor_type;
- typedef typename feature_extractor::state_type state_type;
- typedef typename feature_extractor::action_type action_type;
-
- process_sample(){}
-
- process_sample(
- const state_type& s,
- const action_type& a,
- const state_type& n,
- const double& r
- ) : state(s), action(a), next_state(n), reward(r) {}
-
- state_type state;
- action_type action;
- state_type next_state;
- double reward;
- };
-
- template < typename feature_extractor >
- void serialize (const process_sample<feature_extractor>& item, std::ostream& out)
- {
- serialize(item.state, out);
- serialize(item.action, out);
- serialize(item.next_state, out);
- serialize(item.reward, out);
- }
-
- template < typename feature_extractor >
- void deserialize (process_sample<feature_extractor>& item, std::istream& in)
- {
- deserialize(item.state, in);
- deserialize(item.action, in);
- deserialize(item.next_state, in);
- deserialize(item.reward, in);
- }
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename feature_extractor
- >
- class policy
- {
- public:
-
- typedef feature_extractor feature_extractor_type;
- typedef typename feature_extractor::state_type state_type;
- typedef typename feature_extractor::action_type action_type;
-
-
- policy (
- )
- {
- w.set_size(fe.num_features());
- w = 0;
- }
-
- policy (
- const matrix<double,0,1>& weights_,
- const feature_extractor& fe_
- ) : w(weights_), fe(fe_) {}
-
- action_type operator() (
- const state_type& state
- ) const
- {
- return fe.find_best_action(state,w);
- }
-
- const feature_extractor& get_feature_extractor (
- ) const { return fe; }
-
- const matrix<double,0,1>& get_weights (
- ) const { return w; }
-
-
- private:
- matrix<double,0,1> w;
- feature_extractor fe;
- };
-
- template < typename feature_extractor >
- inline void serialize(const policy<feature_extractor>& item, std::ostream& out)
- {
- int version = 1;
- serialize(version, out);
- serialize(item.get_feature_extractor(), out);
- serialize(item.get_weights(), out);
- }
- template < typename feature_extractor >
- inline void deserialize(policy<feature_extractor>& item, std::istream& in)
- {
- int version = 0;
- deserialize(version, in);
- if (version != 1)
- throw serialization_error("Unexpected version found while deserializing dlib::policy object.");
- feature_extractor fe;
- matrix<double,0,1> w;
- deserialize(fe, in);
- deserialize(w, in);
- item = policy<feature_extractor>(w,fe);
- }
-
-// ----------------------------------------------------------------------------------------
-
-}
-
-#endif // DLIB_APPROXIMATE_LINEAR_MODELS_Hh_
-
diff --git a/ml/dlib/dlib/control/approximate_linear_models_abstract.h b/ml/dlib/dlib/control/approximate_linear_models_abstract.h
deleted file mode 100644
index 59dac4276..000000000
--- a/ml/dlib/dlib/control/approximate_linear_models_abstract.h
+++ /dev/null
@@ -1,213 +0,0 @@
-// Copyright (C) 2015 Davis E. King (davis@dlib.net)
-// License: Boost Software License See LICENSE.txt for the full license.
-#undef DLIB_APPROXIMATE_LINEAR_MODELS_ABSTRACT_Hh_
-#ifdef DLIB_APPROXIMATE_LINEAR_MODELS_ABSTRACT_Hh_
-
-#include "../matrix.h"
-
-namespace dlib
-{
-
-// ----------------------------------------------------------------------------------------
-
- struct example_feature_extractor
- {
- /*!
- WHAT THIS OBJECT REPRESENTS
- This object defines the interface a feature extractor must implement if it
- is to be used with the process_sample and policy objects defined at the
- bottom of this file. Moreover, it is meant to represent the core part
- of a model used in a reinforcement learning algorithm.
-
- In particular, this object models a Q(state,action) function where
- Q(state,action) == dot(w, PSI(state,action))
- where PSI(state,action) is a feature vector and w is a parameter
- vector.
-
- Therefore, a feature extractor defines how the PSI(x,y) feature vector is
- calculated. It also defines the types used to represent the state and
- action objects.
-
-
- THREAD SAFETY
- Instances of this object are required to be threadsafe, that is, it should
- be safe for multiple threads to make concurrent calls to the member
- functions of this object.
- !*/
-
- // The state and actions can be any types so long as you provide typedefs for them.
- typedef T state_type;
- typedef U action_type;
- // We can also say that the last element in the weight vector w must be 1. This
- // can be useful for including a prior into your model.
- const static bool force_last_weight_to_1 = false;
-
- example_feature_extractor(
- );
- /*!
- ensures
- - this object is properly initialized.
- !*/
-
- unsigned long num_features(
- ) const;
- /*!
- ensures
- - returns the dimensionality of the PSI() feature vector.
- !*/
-
- action_type find_best_action (
- const state_type& state,
- const matrix<double,0,1>& w
- ) const;
- /*!
- ensures
- - returns the action A that maximizes Q(state,A) = dot(w,PSI(state,A)).
- That is, this function finds the best action to take in the given state
- when our model is parameterized by the given weight vector w.
- !*/
-
- void get_features (
- const state_type& state,
- const action_type& action,
- matrix<double,0,1>& feats
- ) const;
- /*!
- ensures
- - #feats.size() == num_features()
- - #feats == PSI(state,action)
- !*/
-
- };
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename feature_extractor
- >
- struct process_sample
- {
- /*!
- REQUIREMENTS ON feature_extractor
- feature_extractor should implement the example_feature_extractor interface
- defined at the top of this file.
-
- WHAT THIS OBJECT REPRESENTS
- This object holds a training sample for a reinforcement learning algorithm.
- In particular, it should be a sample from some process where the process
- was in state this->state, then took this->action action which resulted in
- receiving this->reward and ending up in the state this->next_state.
- !*/
-
- typedef feature_extractor feature_extractor_type;
- typedef typename feature_extractor::state_type state_type;
- typedef typename feature_extractor::action_type action_type;
-
- process_sample(){}
-
- process_sample(
- const state_type& s,
- const action_type& a,
- const state_type& n,
- const double& r
- ) : state(s), action(a), next_state(n), reward(r) {}
-
- state_type state;
- action_type action;
- state_type next_state;
- double reward;
- };
-
- template < typename feature_extractor >
- void serialize (const process_sample<feature_extractor>& item, std::ostream& out);
- template < typename feature_extractor >
- void deserialize (process_sample<feature_extractor>& item, std::istream& in);
- /*!
- provides serialization support.
- !*/
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename feature_extractor
- >
- class policy
- {
- /*!
- REQUIREMENTS ON feature_extractor
- feature_extractor should implement the example_feature_extractor interface
- defined at the top of this file.
-
- WHAT THIS OBJECT REPRESENTS
- This is a policy based on the supplied feature_extractor model. In
- particular, it maps from feature_extractor::state_type to the best action
- to take in that state.
- !*/
-
- public:
-
- typedef feature_extractor feature_extractor_type;
- typedef typename feature_extractor::state_type state_type;
- typedef typename feature_extractor::action_type action_type;
-
-
- policy (
- );
- /*!
- ensures
- - #get_feature_extractor() == feature_extractor()
- (i.e. it will have its default value)
- - #get_weights().size() == #get_feature_extractor().num_features()
- - #get_weights() == 0
- !*/
-
- policy (
- const matrix<double,0,1>& weights,
- const feature_extractor& fe
- );
- /*!
- requires
- - fe.num_features() == weights.size()
- ensures
- - #get_feature_extractor() == fe
- - #get_weights() == weights
- !*/
-
- action_type operator() (
- const state_type& state
- ) const;
- /*!
- ensures
- - returns get_feature_extractor().find_best_action(state,w);
- !*/
-
- const feature_extractor& get_feature_extractor (
- ) const;
- /*!
- ensures
- - returns the feature extractor used by this object
- !*/
-
- const matrix<double,0,1>& get_weights (
- ) const;
- /*!
- ensures
- - returns the parameter vector (w) associated with this object. The length
- of the vector is get_feature_extractor().num_features().
- !*/
-
- };
-
- template < typename feature_extractor >
- void serialize(const policy<feature_extractor>& item, std::ostream& out);
- template < typename feature_extractor >
- void deserialize(policy<feature_extractor>& item, std::istream& in);
- /*!
- provides serialization support.
- !*/
-
-// ----------------------------------------------------------------------------------------
-
-
-#endif // DLIB_APPROXIMATE_LINEAR_MODELS_ABSTRACT_Hh_
-
diff --git a/ml/dlib/dlib/control/lspi.h b/ml/dlib/dlib/control/lspi.h
deleted file mode 100644
index b21a501d2..000000000
--- a/ml/dlib/dlib/control/lspi.h
+++ /dev/null
@@ -1,188 +0,0 @@
-// Copyright (C) 2015 Davis E. King (davis@dlib.net)
-// License: Boost Software License See LICENSE.txt for the full license.
-#ifndef DLIB_LSPI_Hh_
-#define DLIB_LSPI_Hh_
-
-#include "lspi_abstract.h"
-#include "approximate_linear_models.h"
-
-namespace dlib
-{
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename feature_extractor
- >
- class lspi
- {
- public:
- typedef feature_extractor feature_extractor_type;
- typedef typename feature_extractor::state_type state_type;
- typedef typename feature_extractor::action_type action_type;
-
- explicit lspi(
- const feature_extractor& fe_
- ) : fe(fe_)
- {
- init();
- }
-
- lspi(
- )
- {
- init();
- }
-
- double get_discount (
- ) const { return discount; }
-
- void set_discount (
- double value
- )
- {
- // make sure requires clause is not broken
- DLIB_ASSERT(0 < value && value <= 1,
- "\t void lspi::set_discount(value)"
- << "\n\t invalid inputs were given to this function"
- << "\n\t value: " << value
- );
- discount = value;
- }
-
- const feature_extractor& get_feature_extractor (
- ) const { return fe; }
-
- void be_verbose (
- )
- {
- verbose = true;
- }
-
- void be_quiet (
- )
- {
- verbose = false;
- }
-
- void set_epsilon (
- double eps_
- )
- {
- // make sure requires clause is not broken
- DLIB_ASSERT(eps_ > 0,
- "\t void lspi::set_epsilon(eps_)"
- << "\n\t invalid inputs were given to this function"
- << "\n\t eps_: " << eps_
- );
- eps = eps_;
- }
-
- double get_epsilon (
- ) const
- {
- return eps;
- }
-
- void set_lambda (
- double lambda_
- )
- {
- // make sure requires clause is not broken
- DLIB_ASSERT(lambda_ >= 0,
- "\t void lspi::set_lambda(lambda_)"
- << "\n\t invalid inputs were given to this function"
- << "\n\t lambda_: " << lambda_
- );
- lambda = lambda_;
- }
-
- double get_lambda (
- ) const
- {
- return lambda;
- }
-
- void set_max_iterations (
- unsigned long max_iter
- ) { max_iterations = max_iter; }
-
- unsigned long get_max_iterations (
- ) { return max_iterations; }
-
- template <typename vector_type>
- policy<feature_extractor> train (
- const vector_type& samples
- ) const
- {
- // make sure requires clause is not broken
- DLIB_ASSERT(samples.size() > 0,
- "\t policy lspi::train(samples)"
- << "\n\t invalid inputs were given to this function"
- );
-
- matrix<double,0,1> w(fe.num_features());
- w = 0;
- matrix<double,0,1> prev_w, b, f1, f2;
-
- matrix<double> A;
-
- double change;
- unsigned long iter = 0;
- do
- {
- A = identity_matrix<double>(fe.num_features())*lambda;
- b = 0;
- for (unsigned long i = 0; i < samples.size(); ++i)
- {
- fe.get_features(samples[i].state, samples[i].action, f1);
- fe.get_features(samples[i].next_state,
- fe.find_best_action(samples[i].next_state,w),
- f2);
- A += f1*trans(f1 - discount*f2);
- b += f1*samples[i].reward;
- }
-
- prev_w = w;
- if (feature_extractor::force_last_weight_to_1)
- w = join_cols(pinv(colm(A,range(0,A.nc()-2)))*(b-colm(A,A.nc()-1)),mat(1.0));
- else
- w = pinv(A)*b;
-
- change = length(w-prev_w);
- ++iter;
-
- if (verbose)
- std::cout << "iteration: " << iter << "\tchange: " << change << std::endl;
-
- } while(change > eps && iter < max_iterations);
-
- return policy<feature_extractor>(w,fe);
- }
-
-
- private:
-
- void init()
- {
- lambda = 0.01;
- discount = 0.8;
- eps = 0.01;
- verbose = false;
- max_iterations = 100;
- }
-
- double lambda;
- double discount;
- double eps;
- bool verbose;
- unsigned long max_iterations;
- feature_extractor fe;
- };
-
-// ----------------------------------------------------------------------------------------
-
-}
-
-#endif // DLIB_LSPI_Hh_
-
diff --git a/ml/dlib/dlib/control/lspi_abstract.h b/ml/dlib/dlib/control/lspi_abstract.h
deleted file mode 100644
index f262d16f4..000000000
--- a/ml/dlib/dlib/control/lspi_abstract.h
+++ /dev/null
@@ -1,193 +0,0 @@
-// Copyright (C) 2015 Davis E. King (davis@dlib.net)
-// License: Boost Software License See LICENSE.txt for the full license.
-#undef DLIB_LSPI_ABSTRACT_Hh_
-#ifdef DLIB_LSPI_ABSTRACT_Hh_
-
-#include "approximate_linear_models_abstract.h"
-
-namespace dlib
-{
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename feature_extractor
- >
- class lspi
- {
- /*!
- REQUIREMENTS ON feature_extractor
- feature_extractor should implement the example_feature_extractor interface
- defined at the top of dlib/control/approximate_linear_models_abstract.h
-
- WHAT THIS OBJECT REPRESENTS
- This object is an implementation of the reinforcement learning algorithm
- described in the following paper:
- Lagoudakis, Michail G., and Ronald Parr. "Least-squares policy
- iteration." The Journal of Machine Learning Research 4 (2003):
- 1107-1149.
-
- This means that it takes a bunch of training data in the form of
- process_samples and outputs a policy that hopefully performs well when run
- on the process that generated those samples.
- !*/
-
- public:
- typedef feature_extractor feature_extractor_type;
- typedef typename feature_extractor::state_type state_type;
- typedef typename feature_extractor::action_type action_type;
-
- explicit lspi(
- const feature_extractor& fe_
- );
- /*!
- ensures
- - #get_feature_extractor() == fe_
- - #get_lambda() == 0.01
- - #get_discount == 0.8
- - #get_epsilon() == 0.01
- - is not verbose
- - #get_max_iterations() == 100
- !*/
-
- lspi(
- );
- /*!
- ensures
- - #get_feature_extractor() == feature_extractor()
- (i.e. it will have its default value)
- - #get_lambda() == 0.01
- - #get_discount == 0.8
- - #get_epsilon() == 0.01
- - is not verbose
- - #get_max_iterations() == 100
- !*/
-
- double get_discount (
- ) const;
- /*!
- ensures
- - returns the discount applied to the sum of rewards in the Bellman
- equation.
- !*/
-
- void set_discount (
- double value
- );
- /*!
- requires
- - 0 < value <= 1
- ensures
- - #get_discount() == value
- !*/
-
- const feature_extractor& get_feature_extractor (
- ) const;
- /*!
- ensures
- - returns the feature extractor used by this object
- !*/
-
- void be_verbose (
- );
- /*!
- ensures
- - This object will print status messages to standard out so that a
- user can observe the progress of the algorithm.
- !*/
-
- void be_quiet (
- );
- /*!
- ensures
- - this object will not print anything to standard out
- !*/
-
- void set_epsilon (
- double eps
- );
- /*!
- requires
- - eps > 0
- ensures
- - #get_epsilon() == eps
- !*/
-
- double get_epsilon (
- ) const;
- /*!
- ensures
- - returns the error epsilon that determines when training should stop.
- Smaller values may result in a more accurate solution but take longer to
- train.
- !*/
-
- void set_lambda (
- double lambda_
- );
- /*!
- requires
- - lambda >= 0
- ensures
- - #get_lambda() == lambda
- !*/
-
- double get_lambda (
- ) const;
- /*!
- ensures
- - returns the regularization parameter. It is the parameter that
- determines the trade off between trying to fit the training data
- exactly or allowing more errors but hopefully improving the
- generalization ability of the resulting function. Smaller values
- encourage exact fitting while larger values of lambda may encourage
- better generalization.
- !*/
-
- void set_max_iterations (
- unsigned long max_iter
- );
- /*!
- ensures
- - #get_max_iterations() == max_iter
- !*/
-
- unsigned long get_max_iterations (
- );
- /*!
- ensures
- - returns the maximum number of iterations the SVM optimizer is allowed to
- run before it is required to stop and return a result.
- !*/
-
- template <
- typename vector_type
- >
- policy<feature_extractor> train (
- const vector_type& samples
- ) const;
- /*!
- requires
- - samples.size() > 0
- - samples is something with an interface that looks like
- std::vector<process_sample<feature_extractor>>. That is, it should
- be some kind of array of process_sample objects.
- ensures
- - Trains a policy based on the given data and returns the results. The
- idea is to find a policy that will obtain the largest possible reward
- when run on the process that generated the samples. In particular,
- if the returned policy is P then:
- - P(S) == the best action to take when in state S.
- - if (feature_extractor::force_last_weight_to_1) then
- - The last element of P.get_weights() is 1.
- !*/
-
- };
-
-// ----------------------------------------------------------------------------------------
-
-}
-
-#endif // DLIB_LSPI_ABSTRACT_Hh_
-
-
diff --git a/ml/dlib/dlib/control/mpc.h b/ml/dlib/dlib/control/mpc.h
deleted file mode 100644
index 48ef2b72d..000000000
--- a/ml/dlib/dlib/control/mpc.h
+++ /dev/null
@@ -1,370 +0,0 @@
-// Copyright (C) 2015 Davis E. King (davis@dlib.net)
-// License: Boost Software License See LICENSE.txt for the full license.
-#ifndef DLIB_MPC_Hh_
-#define DLIB_MPC_Hh_
-
-#include "mpc_abstract.h"
-#include "../matrix.h"
-#include "../algs.h"
-
-
-namespace dlib
-{
- template <
- long S_,
- long I_,
- unsigned long horizon_
- >
- class mpc
- {
-
- public:
-
- const static long S = S_;
- const static long I = I_;
- const static unsigned long horizon = horizon_;
-
- mpc(
- )
- {
- A = 0;
- B = 0;
- C = 0;
- Q = 0;
- R = 0;
- lower = 0;
- upper = 0;
-
- max_iterations = 0;
- eps = 0.01;
- for (unsigned long i = 0; i < horizon; ++i)
- {
- target[i].set_size(A.nr());
- target[i] = 0;
-
- controls[i].set_size(B.nc());
- controls[i] = 0;
- }
- lambda = 0;
- }
-
- mpc (
- const matrix<double,S,S>& A_,
- const matrix<double,S,I>& B_,
- const matrix<double,S,1>& C_,
- const matrix<double,S,1>& Q_,
- const matrix<double,I,1>& R_,
- const matrix<double,I,1>& lower_,
- const matrix<double,I,1>& upper_
- ) : A(A_), B(B_), C(C_), Q(Q_), R(R_), lower(lower_), upper(upper_)
- {
- // make sure requires clause is not broken
- DLIB_ASSERT(A.nr() > 0 && B.nc() > 0,
- "\t mpc::mpc()"
- << "\n\t invalid inputs were given to this function"
- << "\n\t A.nr(): " << A.nr()
- << "\n\t B.nc(): " << B.nc()
- );
-
- DLIB_ASSERT(A.nr() == A.nc() &&
- A.nr() == B.nr() &&
- A.nr() == C.nr() &&
- A.nr() == Q.nr(),
- "\t mpc::mpc()"
- << "\n\t invalid inputs were given to this function"
- << "\n\t A.nr(): " << A.nr()
- << "\n\t A.nc(): " << A.nc()
- << "\n\t B.nr(): " << B.nr()
- << "\n\t C.nr(): " << C.nr()
- << "\n\t Q.nr(): " << Q.nr()
- );
- DLIB_ASSERT(
- B.nc() == R.nr() &&
- B.nc() == lower.nr() &&
- B.nc() == upper.nr() ,
- "\t mpc::mpc()"
- << "\n\t invalid inputs were given to this function"
- << "\n\t B.nr(): " << B.nr()
- << "\n\t B.nc(): " << B.nc()
- << "\n\t lower.nr(): " << lower.nr()
- << "\n\t upper.nr(): " << upper.nr()
- );
- DLIB_ASSERT(min(Q) >= 0 &&
- min(R) > 0 &&
- min(upper-lower) >= 0,
- "\t mpc::mpc()"
- << "\n\t invalid inputs were given to this function"
- << "\n\t min(Q): " << min(Q)
- << "\n\t min(R): " << min(R)
- << "\n\t min(upper-lower): " << min(upper-lower)
- );
-
-
- max_iterations = 10000;
- eps = 0.01;
- for (unsigned long i = 0; i < horizon; ++i)
- {
- target[i].set_size(A.nr());
- target[i] = 0;
-
- controls[i].set_size(B.nc());
- controls[i] = 0;
- }
-
- // Bound the maximum eigenvalue of the hessian by computing the trace of the
- // hessian matrix.
- lambda = sum(R)*horizon;
- matrix<double,S,S> temp = diagm(Q);
- for (unsigned long c = 0; c < horizon; ++c)
- {
- lambda += trace(trans(B)*temp*B);
- Q_diag[horizon-c-1] = diag(trans(B)*temp*B);
- temp = trans(A)*temp*A + diagm(Q);
- }
-
- }
-
- const matrix<double,S,S>& get_A (
- ) const { return A; }
- const matrix<double,S,I>& get_B (
- ) const { return B; }
- const matrix<double,S,1>& get_C (
- ) const { return C; }
- const matrix<double,S,1>& get_Q (
- ) const { return Q; }
- const matrix<double,I,1>& get_R (
- ) const { return R; }
- const matrix<double,I,1>& get_lower_constraints (
- ) const { return lower; }
- const matrix<double,I,1>& get_upper_constraints (
- ) const { return upper; }
-
- void set_target (
- const matrix<double,S,1>& val,
- const unsigned long time
- )
- {
- DLIB_ASSERT(time < horizon,
- "\t void mpc::set_target(eps_)"
- << "\n\t invalid inputs were given to this function"
- << "\n\t time: " << time
- << "\n\t horizon: " << horizon
- );
-
- target[time] = val;
- }
-
- void set_target (
- const matrix<double,S,1>& val
- )
- {
- for (unsigned long i = 0; i < horizon; ++i)
- target[i] = val;
- }
-
- void set_last_target (
- const matrix<double,S,1>& val
- )
- {
- set_target(val, horizon-1);
- }
-
- const matrix<double,S,1>& get_target (
- const unsigned long time
- ) const
- {
- // make sure requires clause is not broken
- DLIB_ASSERT(time < horizon,
- "\t matrix mpc::get_target(eps_)"
- << "\n\t invalid inputs were given to this function"
- << "\n\t time: " << time
- << "\n\t horizon: " << horizon
- );
-
- return target[time];
- }
-
- unsigned long get_max_iterations (
- ) const { return max_iterations; }
-
- void set_max_iterations (
- unsigned long max_iter
- )
- {
- max_iterations = max_iter;
- }
-
- void set_epsilon (
- double eps_
- )
- {
- // make sure requires clause is not broken
- DLIB_ASSERT(eps_ > 0,
- "\t void mpc::set_epsilon(eps_)"
- << "\n\t invalid inputs were given to this function"
- << "\n\t eps_: " << eps_
- );
- eps = eps_;
- }
-
- double get_epsilon (
- ) const
- {
- return eps;
- }
-
- matrix<double,I,1> operator() (
- const matrix<double,S,1>& current_state
- )
- {
- // make sure requires clause is not broken
- DLIB_ASSERT(min(R) > 0 && A.nr() == current_state.size(),
- "\t matrix mpc::operator(current_state)"
- << "\n\t invalid inputs were given to this function"
- << "\n\t min(R): " << min(R)
- << "\n\t A.nr(): " << A.nr()
- << "\n\t current_state.size(): " << current_state.size()
- );
-
- // Shift the inputs over by one time step so we can use them to warm start the
- // optimizer.
- for (unsigned long i = 1; i < horizon; ++i)
- controls[i-1] = controls[i];
-
- solve_linear_mpc(current_state);
-
- for (unsigned long i = 1; i < horizon; ++i)
- target[i-1] = target[i];
-
- return controls[0];
- }
-
- private:
-
-
- // These temporary variables here just to avoid reallocating them on each call to
- // operator().
- matrix<double,S,1> M[horizon];
- matrix<double,I,1> MM[horizon];
- matrix<double,I,1> df[horizon];
- matrix<double,I,1> v[horizon];
- matrix<double,I,1> v_old[horizon];
-
- void solve_linear_mpc (
- const matrix<double,S,1>& initial_state
- )
- {
- // make it so MM == trans(K)*Q*(M-target)
- M[0] = A*initial_state + C;
- for (unsigned long i = 1; i < horizon; ++i)
- M[i] = A*M[i-1] + C;
- for (unsigned long i = 0; i < horizon; ++i)
- M[i] = diagm(Q)*(M[i]-target[i]);
- for (long i = (long)horizon-2; i >= 0; --i)
- M[i] += trans(A)*M[i+1];
- for (unsigned long i = 0; i < horizon; ++i)
- MM[i] = trans(B)*M[i];
-
-
-
- unsigned long iter = 0;
- for (; iter < max_iterations; ++iter)
- {
- // compute current gradient and put it into df.
- // df == H*controls + MM;
- M[0] = B*controls[0];
- for (unsigned long i = 1; i < horizon; ++i)
- M[i] = A*M[i-1] + B*controls[i];
- for (unsigned long i = 0; i < horizon; ++i)
- M[i] = diagm(Q)*M[i];
- for (long i = (long)horizon-2; i >= 0; --i)
- M[i] += trans(A)*M[i+1];
- for (unsigned long i = 0; i < horizon; ++i)
- df[i] = MM[i] + trans(B)*M[i] + diagm(R)*controls[i];
-
-
-
- // Check the stopping condition, which is the magnitude of the largest element
- // of the gradient.
- double max_df = 0;
- unsigned long max_t = 0;
- long max_v = 0;
- for (unsigned long i = 0; i < horizon; ++i)
- {
- for (long j = 0; j < controls[i].size(); ++j)
- {
- // if this variable isn't an active constraint then we care about it's
- // derivative.
- if (!((controls[i](j) <= lower(j) && df[i](j) > 0) ||
- (controls[i](j) >= upper(j) && df[i](j) < 0)))
- {
- if (std::abs(df[i](j)) > max_df)
- {
- max_df = std::abs(df[i](j));
- max_t = i;
- max_v = j;
- }
- }
- }
- }
- if (max_df < eps)
- break;
-
-
-
- // We will start out by doing a little bit of coordinate descent because it
- // allows us to optimize individual variables exactly. Since we are warm
- // starting each iteration with a really good solution this helps speed
- // things up a lot.
- const unsigned long smo_iters = 50;
- if (iter < smo_iters)
- {
- if (Q_diag[max_t](max_v) == 0) continue;
-
- // Take the optimal step but just for one variable.
- controls[max_t](max_v) = -(df[max_t](max_v)-Q_diag[max_t](max_v)*controls[max_t](max_v))/Q_diag[max_t](max_v);
- controls[max_t](max_v) = put_in_range(lower(max_v), upper(max_v), controls[max_t](max_v));
-
- // If this is the last SMO iteration then don't forget to initialize v
- // for the gradient steps.
- if (iter+1 == smo_iters)
- {
- for (unsigned long i = 0; i < horizon; ++i)
- v[i] = controls[i];
- }
- }
- else
- {
- // Take a projected gradient step.
- for (unsigned long i = 0; i < horizon; ++i)
- {
- v_old[i] = v[i];
- v[i] = dlib::clamp(controls[i] - 1.0/lambda * df[i], lower, upper);
- controls[i] = dlib::clamp(v[i] + (std::sqrt(lambda)-1)/(std::sqrt(lambda)+1)*(v[i]-v_old[i]), lower, upper);
- }
- }
- }
- }
-
- unsigned long max_iterations;
- double eps;
-
- matrix<double,S,S> A;
- matrix<double,S,I> B;
- matrix<double,S,1> C;
- matrix<double,S,1> Q;
- matrix<double,I,1> R;
- matrix<double,I,1> lower;
- matrix<double,I,1> upper;
- matrix<double,S,1> target[horizon];
-
- double lambda; // abound on the largest eigenvalue of the hessian matrix.
- matrix<double,I,1> Q_diag[horizon];
- matrix<double,I,1> controls[horizon];
-
- };
-
-}
-
-#endif // DLIB_MPC_Hh_
-
diff --git a/ml/dlib/dlib/control/mpc_abstract.h b/ml/dlib/dlib/control/mpc_abstract.h
deleted file mode 100644
index b4421c076..000000000
--- a/ml/dlib/dlib/control/mpc_abstract.h
+++ /dev/null
@@ -1,276 +0,0 @@
-// Copyright (C) 2015 Davis E. King (davis@dlib.net)
-// License: Boost Software License See LICENSE.txt for the full license.
-#undef DLIB_MPC_ABSTRACT_Hh_
-#ifdef DLIB_MPC_ABSTRACT_Hh_
-
-#include "../matrix.h"
-
-namespace dlib
-{
- template <
- long S_,
- long I_,
- unsigned long horizon_
- >
- class mpc
- {
- /*!
- REQUIREMENTS ON horizon_
- horizon_ > 0
-
- REQUIREMENTS ON S_
- S_ >= 0
-
- REQUIREMENTS ON I_
- I_ >= 0
-
- WHAT THIS OBJECT REPRESENTS
- This object implements a linear model predictive controller. To explain
- what that means, suppose you have some process you want to control and the
- process dynamics are described by the linear equation:
- x_{i+1} = A*x_i + B*u_i + C
- That is, the next state the system goes into is a linear function of its
- current state (x_i) and the current control (u_i) plus some constant bias
- or disturbance.
-
- A model predictive controller can find the control (u) you should apply to
- drive the state (x) to some reference value, or alternatively to make the
- state track some reference time-varying sequence. It does this by
- simulating the process for horizon_ time steps and selecting the control
- that leads to the best performance over the next horizon_ steps.
-
- To be precise, each time you ask this object for a control, it solves the
- following quadratic program:
-
- min sum_i trans(x_i-target_i)*Q*(x_i-target_i) + trans(u_i)*R*u_i
- x_i,u_i
-
- such that: x_0 == current_state
- x_{i+1} == A*x_i + B*u_i + C
- lower <= u_i <= upper
- 0 <= i < horizon_
-
- and reports u_0 as the control you should take given that you are currently
- in current_state. Q and R are user supplied matrices that define how we
- penalize variations away from the target state as well as how much we want
- to avoid generating large control signals.
-
- Finally, the algorithm we use to solve this quadratic program is based
- largely on the method described in:
- A Fast Gradient method for embedded linear predictive control (2011)
- by Markus Kogel and Rolf Findeisen
- !*/
-
- public:
-
- const static long S = S_;
- const static long I = I_;
- const static unsigned long horizon = horizon_;
-
- mpc(
- );
- /*!
- ensures
- - #get_max_iterations() == 0
- - The A,B,C,Q,R,lower, and upper parameter matrices are filled with zeros.
- Therefore, to use this object you must initialize it via the constructor
- that supplies these parameters.
- !*/
-
- mpc (
- const matrix<double,S,S>& A,
- const matrix<double,S,I>& B,
- const matrix<double,S,1>& C,
- const matrix<double,S,1>& Q,
- const matrix<double,I,1>& R,
- const matrix<double,I,1>& lower,
- const matrix<double,I,1>& upper
- );
- /*!
- requires
- - A.nr() > 0
- - B.nc() > 0
- - A.nr() == A.nc() == B.nr() == C.nr() == Q.nr()
- - B.nc() == R.nr() == lower.nr() == upper.nr()
- - min(Q) >= 0
- - min(R) > 0
- - min(upper-lower) >= 0
- ensures
- - #get_A() == A
- - #get_B() == B
- - #get_C() == C
- - #get_Q() == Q
- - #get_R() == R
- - #get_lower_constraints() == lower
- - #get_upper_constraints() == upper
- - for all valid i:
- - get_target(i) == a vector of all zeros
- - get_target(i).size() == A.nr()
- - #get_max_iterations() == 10000
- - #get_epsilon() == 0.01
- !*/
-
- const matrix<double,S,S>& get_A (
- ) const;
- /*!
- ensures
- - returns the A matrix from the quadratic program defined above.
- !*/
-
- const matrix<double,S,I>& get_B (
- ) const;
- /*!
- ensures
- - returns the B matrix from the quadratic program defined above.
- !*/
-
- const matrix<double,S,1>& get_C (
- ) const;
- /*!
- ensures
- - returns the C matrix from the quadratic program defined above.
- !*/
-
- const matrix<double,S,1>& get_Q (
- ) const;
- /*!
- ensures
- - returns the diagonal of the Q matrix from the quadratic program defined
- above.
- !*/
-
- const matrix<double,I,1>& get_R (
- ) const;
- /*!
- ensures
- - returns the diagonal of the R matrix from the quadratic program defined
- above.
- !*/
-
- const matrix<double,I,1>& get_lower_constraints (
- ) const;
- /*!
- ensures
- - returns the lower matrix from the quadratic program defined above. All
- controls generated by this object will have values no less than this
- lower bound. That is, any control u will satisfy min(u-lower) >= 0.
- !*/
-
- const matrix<double,I,1>& get_upper_constraints (
- ) const;
- /*!
- ensures
- - returns the upper matrix from the quadratic program defined above. All
- controls generated by this object will have values no larger than this
- upper bound. That is, any control u will satisfy min(upper-u) >= 0.
- !*/
-
- const matrix<double,S,1>& get_target (
- const unsigned long time
- ) const;
- /*!
- requires
- - time < horizon
- ensures
- - This object will try to find the control sequence that results in the
- process obtaining get_target(time) state at the indicated time. Note
- that the next time instant after "right now" is time 0.
- !*/
-
- void set_target (
- const matrix<double,S,1>& val,
- const unsigned long time
- );
- /*!
- requires
- - time < horizon
- ensures
- - #get_target(time) == val
- !*/
-
- void set_target (
- const matrix<double,S,1>& val
- );
- /*!
- ensures
- - for all valid t:
- - #get_target(t) == val
- !*/
-
- void set_last_target (
- const matrix<double,S,1>& val
- );
- /*!
- ensures
- - performs: set_target(val, horizon-1)
- !*/
-
- unsigned long get_max_iterations (
- ) const;
- /*!
- ensures
- - When operator() is called it solves an optimization problem to
- get_epsilon() precision to determine the next control action. In
- particular, we run the optimizer until the magnitude of each element of
- the gradient vector is less than get_epsilon() or until
- get_max_iterations() solver iterations have been executed.
- !*/
-
- void set_max_iterations (
- unsigned long max_iter
- );
- /*!
- ensures
- - #get_max_iterations() == max_iter
- !*/
-
- void set_epsilon (
- double eps
- );
- /*!
- requires
- - eps > 0
- ensures
- - #get_epsilon() == eps
- !*/
-
- double get_epsilon (
- ) const;
- /*!
- ensures
- - When operator() is called it solves an optimization problem to
- get_epsilon() precision to determine the next control action. In
- particular, we run the optimizer until the magnitude of each element of
- the gradient vector is less than get_epsilon() or until
- get_max_iterations() solver iterations have been executed. This means
- that smaller epsilon values will give more accurate outputs but may take
- longer to compute.
- !*/
-
- matrix<double,I,1> operator() (
- const matrix<double,S,1>& current_state
- );
- /*!
- requires
- - min(R) > 0
- - A.nr() == current_state.size()
- ensures
- - Solves the model predictive control problem defined by the arguments to
- this objects constructor, assuming that the starting state is given by
- current_state. Then we return the control that should be taken in the
- current state that best optimizes the quadratic objective function
- defined above.
- - We also shift over the target states so that you only need to update the
- last one (if you are using non-zero target states) via a call to
- set_last_target()). In particular, for all valid t, it will be the case
- that:
- - #get_target(t) == get_target(t+1)
- - #get_target(horizon-1) == get_target(horizon-1)
- !*/
-
- };
-
-}
-
-#endif // DLIB_MPC_ABSTRACT_Hh_
-