1 files changed, 765 insertions, 0 deletions
diff --git a/ml/dlib/dlib/dnn/trainer_abstract.h b/ml/dlib/dlib/dnn/trainer_abstract.h
new file mode 100644
index 000000000..3bfb6dc99
--- /dev/null
+++ b/ml/dlib/dlib/dnn/trainer_abstract.h
@@ -0,0 +1,765 @@
+// Copyright (C) 2015  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_DNn_TRAINER_ABSTRACT_H_
+#ifdef DLIB_DNn_TRAINER_ABSTRACT_H_
+
+#include "core_abstract.h"
+#include "solvers_abstract.h"
+#include <vector>
+#include <chrono>
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    enum class force_flush_to_disk {
+        no = 0,
+        yes = 1
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename net_type, 
+        typename solver_type = sgd
+        >
+    class dnn_trainer
+    {
+        /*!
+            REQUIREMENTS ON net_type
+                - net_type is an add_loss_layer object.
+
+            REQUIREMENTS ON solver_type
+                - solver_type is an implementation of the EXAMPLE_SOLVER interface defined
+                  in solvers_abstract.h
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool training a deep neural network. To use it you supply
+                a neural network type and a solver, then you call train() with your
+                training data and it will output a new network instance that has hopefully
+                learned something useful from your training data.
+
+                If you are compiling with CUDA then this object will use the GPU that is
+                currently selected (i.e. the one indicated by cudaGetDevice()) when
+                dnn_trainer is constructed.  It will continue to use that device even if
+                you later change it by a call to cudaSetDevice().
+
+            EXCEPTIONS
+                If an exception is thrown by any part of the neural network during training
+                then the exception will be propagated out of the trainer to the user.
+                Moreover, the trainer instance will be unusable and should be destroyed.
+        !*/
+
+    public:
+
+        typedef typename net_type::training_label_type training_label_type;
+        typedef typename net_type::input_type input_type;
+        const static size_t num_computational_layers = net_type::num_computational_layers;
+
+        dnn_trainer() = delete;
+        dnn_trainer(const dnn_trainer&) = delete;
+        dnn_trainer& operator=(const dnn_trainer&) = delete;
+
+        dnn_trainer(
+            net_type& net, 
+            const solver_type& solver = solver_type(),
+            const std::vector<int>& cuda_extra_devices = {}
+        ); 
+        /*!
+            requires
+                - for all valid i:
+                    - 0 <= cuda_extra_devices[i] < dlib::cuda::get_num_devices()
+            ensures
+                - &#get_net() == &net 
+                  (i.e. The dnn_trainer holds a reference to net, it does not copy it.
+                  Therefore, you must ensure net has a lifetime at least as long as the
+                  dnn_trainer).
+                - #get_solvers() == a set of solvers that are all initialized with the
+                  provided solver instance.
+                - #get_max_num_epochs() == 10000
+                - #get_mini_batch_size() == 128
+                - #get_learning_rate() == 1e-2 
+                - #get_min_learning_rate() == 1e-5
+                - #get_iterations_without_progress_threshold() == 2000
+                - #get_test_iterations_without_progress_threshold() == 500
+                - #get_learning_rate_shrink_factor() == 0.1
+                - #get_learning_rate_schedule().size() == 0
+                - #get_train_one_step_calls() == 0
+                - #get_test_one_step_calls() == 0
+                - #get_synchronization_file() == ""
+                - if (cuda_extra_devices.size() > 0) then
+                    - This object will use multiple graphics cards to run the learning
+                      algorithms.  In particular, it will always use whatever device is
+                      currently selected on the calling thread (the device indicated by
+                      cudaGetDevice()).  In addition, you can ask to use additional
+                      devices, which you do by putting their device numbers into
+                      cuda_extra_devices.
+        !*/
+
+        net_type& get_net (
+            force_flush_to_disk force_flush = force_flush_to_disk::yes
+        ); 
+        /*!
+            ensures
+                - returns the neural network object used by this trainer.  This is the
+                  network that is optimized when you call train() or train_one_step().
+                  Recall that the dnn_trainer doesn't contain the net_type object but
+                  simply holds a reference to an external network which was provided to the
+                  dnn_trainer's constructor.
+                - This function blocks until all threads inside the dnn_trainer have
+                  stopped touching the net. 
+                - If force_flush is yes, then this function will sync the trainer state to
+                  disk if the current state hasn't already been synced to disk since the
+                  last network modification.
+        !*/
+
+        const std::vector<solver_type>& get_solvers (
+        ) const; 
+        /*!
+            ensures
+                - returns the solvers used to optimize each layer of the neural network
+                  get_net().  In particular, the first layer's solver is
+                  get_solvers()[0], the second layer's solver is
+                  get_solvers()[1], and so on.
+                - This function blocks until all threads inside the dnn_trainer have
+                  stopped touching the net. 
+        !*/
+
+        unsigned long get_mini_batch_size (
+        ) const; 
+        /*!
+            ensures
+                - During training, we call the network's update() routine over and over
+                  with training data.  The number of training samples we give to each call
+                  to update is the "mini-batch size", which is defined by
+                  get_mini_batch_size().
+        !*/
+
+        void set_mini_batch_size (
+            unsigned long batch_size 
+        );
+        /*!
+            requires
+                - batch_size > 0
+            ensures
+                - #get_mini_batch_size() == batch_size
+        !*/
+
+        unsigned long get_max_num_epochs (
+        ) const; 
+        /*!
+            ensures
+                - train() will execute at most get_max_num_epochs() iterations over the
+                  training data before returning.
+        !*/
+
+        void set_max_num_epochs (
+            unsigned long num
+        );
+        /*!
+            requires
+                - num > 0
+            ensures
+                - #get_max_num_epochs() == num
+        !*/
+
+        void set_learning_rate (
+            double lr
+        );
+        /*!
+            requires
+                - lr > 0
+            ensures
+                - #get_learning_rate() == lr
+                - #get_learning_rate_schedule().size() == 0
+                - This function blocks until all threads inside the dnn_trainer have
+                  stopped touching the net. 
+        !*/
+
+        double get_learning_rate(
+        ) const;
+        /*!
+            ensures
+                - During each training step, a solver tells us how to modify the parameters
+                  of each layer in the network.  It does this by outputting a step vector
+                  that, when added to the parameters, will hopefully result in improved
+                  network performance.  The learning rate is one of the inputs to the
+                  solver and influences the size of this step vector.  This function
+                  returns the current learning rate, that is, the learning rate that will
+                  be used during the next training step.
+        !*/
+
+        void set_min_learning_rate (
+            double lr
+        );
+        /*!
+            requires
+                - lr > 0
+            ensures
+                - #get_min_learning_rate() == lr
+                - #get_learning_rate_schedule().size() == 0
+                - This function blocks until all threads inside the dnn_trainer have
+                  stopped touching the net. 
+        !*/
+
+        double get_min_learning_rate (
+        ) const;
+        /*!
+            ensures
+                - During training via this->train(), this object will test if progress is
+                  still being made and if it isn't then it will reduce get_learning_rate()
+                  by setting it to get_learning_rate()*get_learning_rate_shrink_factor().
+                  However, it will not reduce it below get_min_learning_rate().  Once this
+                  minimum learning rate is crossed the training will terminate.
+                - get_min_learning_rate() doesn't apply if you are using train_one_step().  
+                  You can keep calling train_one_step() as many times as you want and the
+                  learning rate will drop infinitely close to 0 if you run long enough.
+        !*/
+
+        template <typename EXP>
+        void set_learning_rate_schedule (
+            const matrix_exp<EXP>& schedule
+        );
+        /*!
+            requires
+                - schedule.size() > 0
+                - min(schedule) > 0
+            ensures
+                - #get_learning_rate_schedule() == reshape_to_column_vector(schedule)
+                - #get_learning_rate() == schedule(0,0)
+                - #get_min_learning_rate() == min(schedule)
+                - #set_learning_rate_shrink_factor() == 1
+        !*/
+
+        const matrix<double,0,1>& get_learning_rate_schedule (
+        ) const;
+        /*!
+            ensures
+                - if (this function returns a non-empty matrix) then
+                    - This trainer will use an explicit learning rate schedule defined by
+                      the learning rate values in get_learning_rate_schedule().  For
+                      example, if get_learning_rate_schedule() returned {0.1, 0.09, 0.08,
+                      0.07, 0.06} then the first training mini-batch would use a learning
+                      rate of 0.1, then the next training mini-batch uses 0.09, and then
+                      0.8, and so on until the end of the schedule is reached.  
+                      
+                      If you continue to run training after the end of the schedule has
+                      been reached then the learning rate will be fixed to 0.99 times the
+                      final value.  So in our example, eventually the learning rate would
+                      be fixed to 0.99*0.06.  This allows you to test if we have reached the
+                      end of the schedule by checking if get_learning_rate() >= 0.06.
+        !*/
+
+        unsigned long get_steps_without_progress (
+        ) const;
+        /*!
+            ensures
+                - if (get_learning_rate_shrink_factor() != 1) then
+                    - returns an estimate of how many mini-batches have executed without us
+                      observing a statistically significant decrease in the training error.
+                - else
+                    - returns 0
+        !*/
+
+        void set_iterations_without_progress_threshold (
+            unsigned long thresh 
+        );
+        /*!
+            ensures
+                - #get_iterations_without_progress_threshold() == thresh
+                - #get_learning_rate_schedule().size() == 0
+                - This function blocks until all threads inside the dnn_trainer have
+                  stopped touching the net. 
+        !*/
+
+        unsigned long get_iterations_without_progress_threshold (
+        ) const;
+        /*!
+            ensures
+                - This object monitors the progress of training and estimates if the
+                  training error is being reduced.  It does this by looking at the previous
+                  get_iterations_without_progress_threshold() mini-batch results and
+                  applying the statistical test defined by the running_gradient object to
+                  see if the training error is getting smaller.  If it isn't being reduced
+                  then get_learning_rate() is made smaller by a factor of get_learning_rate_shrink_factor().
+
+                  Therefore, get_iterations_without_progress_threshold() should always be
+                  set to something sensibly large so that this test can be done with
+                  reasonably high confidence.  Think of this test as saying "if the loss
+                  hasn't decreased for the previous get_iterations_without_progress_threshold() 
+                  then shrink the learning rate".
+        !*/
+
+        void set_learning_rate_shrink_factor (
+            double shrink
+        );
+        /*!
+            requires
+                - 0 < shrink && shrink <= 1
+            ensures
+                - #get_learning_rate_shrink_factor() == shrink
+                - #get_learning_rate_schedule().size() == 0
+                - This function blocks until all threads inside the dnn_trainer have
+                  stopped touching the net. 
+        !*/
+
+        double get_learning_rate_shrink_factor (
+        ) const;
+        /*!
+            ensures
+                - Whenever the training routine thinks it isn't making progress anymore it
+                  will reduce get_learning_rate() by multiplying it by get_learning_rate_shrink_factor().
+                - You can disable the automatic learning rate reduction by setting
+                  get_learning_rate_shrink_factor() to 1.
+        !*/
+
+        unsigned long long get_train_one_step_calls (
+        ) const;
+        /*!
+            ensures
+                - returns the number of times train_one_step() has been called.
+        !*/
+
+        unsigned long long get_test_one_step_calls (
+        ) const;
+        /*!
+            ensures
+                - returns the number of times test_one_step() has been called.
+        !*/
+
+        void be_verbose (
+        );
+        /*!
+            ensures
+                - This object will print status messages to standard out so that a 
+                  user can observe the progress of the algorithm.
+        !*/
+
+        void be_quiet (
+        );
+        /*!
+            ensures
+                - This object will not print anything to standard out
+        !*/
+
+        void set_synchronization_file (
+            const std::string& filename,
+            std::chrono::seconds time_between_syncs = std::chrono::minutes(15)
+        );
+        /*!
+            ensures
+                - #get_synchronization_file() == filename
+                - While training is running, either via train() or repeated calls to
+                  train_one_step(), this object will save its entire state, including the
+                  state of get_net(), to disk in the file named filename every
+                  time_between_syncs seconds.
+                - If the filename file already exists then the state of this trainer will
+                  be loaded from that file by this call to set_synchronization_file().
+                  This allows you to resume a training session which was previously
+                  interrupted.
+                - It should be noted that when saving, the trainer will alternate between
+                  saving to a file called filename and another file called filename+"_".
+                  We do this because it's possible that your computer might crash (not
+                  because of dlib, just in general) before the data is safely saved to
+                  disk.  This way, you will always have a backup file if the write to disk
+                  gets corrupted or is incomplete.  Moreover, when loading, we will always
+                  load from the newest of the two possible files.
+        !*/
+
+        const std::string& get_synchronization_file (
+        );
+        /*!
+            ensures
+                - Returns the name of the file the dnn_trainer will periodically save it's
+                  state to.  If the return value is "" then synchronization is disabled.
+        !*/
+
+        void train (
+            const std::vector<input_type>& data,
+            const std::vector<training_label_type>& labels 
+        ); 
+        /*!
+            requires
+                - data.size() == labels.size()
+                - data.size() > 0
+                - net_type uses a supervised loss.  
+                  i.e. net_type::training_label_type != no_label_type.
+            ensures
+                - Trains a supervised neural network based on the given training data.
+                  The goal of training is to find the network parameters that minimize
+                  get_net().compute_loss(data.begin(), data.end(), labels.begin()). 
+                - The optimizer will run until get_learning_rate() < get_min_learning_rate() 
+                  or get_max_num_epochs() training epochs have been executed. 
+                - Each layer in the network will be optimized by its corresponding solver
+                  in get_solvers().  
+                - Each call to train DOES NOT reinitialize the state of get_net() or
+                  get_solvers().  That is, the existing state of the solvers and network is
+                  the starting point for the optimization each time train() is called.  In
+                  particular, if you use the set_synchronization_file() method you can
+                  resume an interrupted train() call by simply calling train() again and it
+                  will pick up from the last synchronization point.  
+                - You can obtain the average loss value during the final training epoch by
+                  calling get_average_loss().
+                - This function blocks until all threads inside the dnn_trainer have
+                  stopped touching the net. 
+        !*/
+
+        void train (
+            const std::vector<input_type>& data
+        );
+        /*!
+            requires 
+                - data.size() > 0
+                - net_type uses an unsupervised loss.  
+                  i.e. net_type::training_label_type == no_label_type.
+            ensures
+                - Trains an unsupervised neural network based on the given training data.
+                  The goal of training is to find the network parameters that minimize
+                  get_net().compute_loss(data.begin(), data.end()). 
+                - The optimizer will run until get_learning_rate() < get_min_learning_rate() 
+                  or get_max_num_epochs() training epochs have been executed. 
+                - Each layer in the network will be optimized by its corresponding solver
+                  in get_solvers().  
+                - Each call to train DOES NOT reinitialize the state of get_net() or
+                  get_solvers().  That is, the existing state of the solvers and network is
+                  the starting point for the optimization each time train() is called.  In
+                  particular, if you use the set_synchronization_file() method you can
+                  resume an interrupted train() call by simply calling train() again and it
+                  will pick up from the last synchronization point.  
+                - You can obtain the average loss value during the final training epoch by
+                  calling get_average_loss().
+                - This function blocks until all threads inside the dnn_trainer have
+                  stopped touching the net. 
+        !*/
+
+        void train_one_step (
+            const std::vector<input_type>& data,
+            const std::vector<training_label_type>& labels 
+        );
+        /*!
+            requires
+                - data.size() == labels.size()
+                - data.size() > 0
+                - net_type uses a supervised loss.  
+                  i.e. net_type::training_label_type != no_label_type.
+            ensures
+                - Performs one stochastic gradient update step based on the mini-batch of
+                  data and labels supplied to this function.  In particular, calling
+                  train_one_step() in a loop is equivalent to calling the train() method
+                  defined above.  However, train_one_step() allows you to stream data from
+                  disk into the training process while train() requires you to first load
+                  all the training data into RAM.  Otherwise, these training methods are
+                  equivalent.
+                - You can observe the current average loss value by calling get_average_loss().
+                - The network training will happen in another thread.  Therefore, after
+                  calling this function you should call get_net() before you touch the net
+                  object from the calling thread to ensure no other threads are still
+                  accessing the network.
+                - #get_train_one_step_calls() == get_train_one_step_calls() + 1.
+        !*/
+
+        template <
+            typename data_iterator,
+            typename label_iterator
+            >
+        void train_one_step (
+            data_iterator dbegin,
+            data_iterator dend,
+            label_iterator lbegin
+        );
+        /*!
+            requires
+                - std::advance(lbegin, std::distance(dbegin, dend) - 1) is dereferencable
+                - std::distance(dbegin, dend) > 0
+                - net_type uses a supervised loss.  
+                  i.e. net_type::training_label_type != no_label_type.
+            ensures
+                - Performs one stochastic gradient update step based on the mini-batch of
+                  data and labels supplied to this function.  In particular, calling
+                  train_one_step() in a loop is equivalent to calling the train() method
+                  defined above.  However, train_one_step() allows you to stream data from
+                  disk into the training process while train() requires you to first load
+                  all the training data into RAM.  Otherwise, these training methods are
+                  equivalent.
+                - You can observe the current average loss value by calling get_average_loss().
+                - The network training will happen in another thread.  Therefore, after
+                  calling this function you should call get_net() before you touch the net
+                  object from the calling thread to ensure no other threads are still
+                  accessing the network.
+                - #get_train_one_step_calls() == get_train_one_step_calls() + 1.
+        !*/
+
+        void train_one_step (
+            const std::vector<input_type>& data
+        );
+        /*!
+            requires
+                - data.size() > 0
+                - net_type uses an unsupervised loss.  
+                  i.e. net_type::training_label_type == no_label_type.
+            ensures
+                - Performs one stochastic gradient update step based on the mini-batch of
+                  data supplied to this function.  In particular, calling train_one_step()
+                  in a loop is equivalent to calling the train() method defined above.
+                  However, train_one_step() allows you to stream data from disk into the
+                  training process while train() requires you to first load all the
+                  training data into RAM.  Otherwise, these training methods are
+                  equivalent.
+                - You can observe the current average loss value by calling get_average_loss().
+                - The network training will happen in another thread.  Therefore, after
+                  calling this function you should call get_net() before you touch the net
+                  object from the calling thread to ensure no other threads are still
+                  accessing the network.
+                - #get_train_one_step_calls() == get_train_one_step_calls() + 1.
+        !*/
+
+        template <
+            typename data_iterator
+            >
+        void train_one_step (
+            data_iterator dbegin,
+            data_iterator dend
+        );
+        /*!
+            requires
+                - std::distance(dbegin, dend) > 0
+                - net_type uses an unsupervised loss.  
+                  i.e. net_type::training_label_type == no_label_type.
+            ensures
+                - Performs one stochastic gradient update step based on the mini-batch of
+                  data supplied to this function.  In particular, calling train_one_step()
+                  in a loop is equivalent to calling the train() method defined above.
+                  However, train_one_step() allows you to stream data from disk into the
+                  training process while train() requires you to first load all the
+                  training data into RAM.  Otherwise, these training methods are
+                  equivalent.
+                - You can observe the current average loss value by calling get_average_loss().
+                - The network training will happen in another thread.  Therefore, after
+                  calling this function you should call get_net() before you touch the net
+                  object from the calling thread to ensure no other threads are still
+                  accessing the network.
+                - #get_train_one_step_calls() == get_train_one_step_calls() + 1.
+        !*/
+        
+        double get_average_loss (
+        ) const;
+        /*!
+            ensures
+                - returns the average loss value observed during previous calls to
+                  train_one_step() or train().  That is, the average output of
+                  net_type::update() during the previous mini-batch updates.
+                - Note that, if be_verbose() has been called, then this object will
+                  automatically call clear_average_loss() periodically when it logs the
+                  loss to the console.
+                - This function blocks until all threads inside the dnn_trainer have
+                  stopped touching the net. 
+        !*/
+
+        void clear_average_loss (
+        );
+        /*!
+            ensures
+                - #get_average_loss() == 0
+                - get_average_loss() uses a dlib::running_stats object to keep a running
+                  average of the loss values seen during the previous mini-batch updates
+                  applied during training.  Calling clear_average_loss() resets the
+                  running_stats object so it forgets about all previous loss values
+                  observed.
+                - This function blocks until all threads inside the dnn_trainer have
+                  stopped touching the net. 
+        !*/
+
+    // ----------------------
+
+        double get_average_test_loss (
+        ) const;
+        /*!
+            ensures
+                - returns the average loss value observed during previous calls to
+                  test_one_step(). 
+                - This function blocks until all threads inside the dnn_trainer have
+                  stopped touching the net. 
+        !*/
+
+        void test_one_step (
+            const std::vector<input_type>& data,
+            const std::vector<training_label_type>& labels 
+        );
+        /*!
+            requires
+                - data.size() == labels.size()
+                - data.size() > 0
+                - net_type uses a supervised loss.  
+                  i.e. net_type::training_label_type != no_label_type.
+            ensures
+                - Runs the given data through the network and computes and records the loss.  
+                - This call does not modify network parameters.  The point of
+                  test_one_step() is two fold, to allow you to observe the accuracy of the
+                  network on hold out data during training, and to allow the trainer to
+                  automatically adjust the learning rate when the test loss stops
+                  improving.  It should be noted that you are not required to use
+                  test_one_step() at all, but if you want to do this kind of thing it is
+                  available.
+                - You can observe the current average loss value by calling get_average_test_loss().
+                - The computation will happen in another thread.  Therefore, after calling
+                  this function you should call get_net() before you touch the net object
+                  from the calling thread to ensure no other threads are still accessing
+                  the network.
+                - #get_test_one_step_calls() == get_test_one_step_calls() + 1.
+        !*/
+
+        template <
+            typename data_iterator,
+            typename label_iterator
+            >
+        void test_one_step (
+            data_iterator dbegin, 
+            data_iterator dend,
+            label_iterator lbegin
+        );
+        /*!
+            requires
+                - std::advance(lbegin, std::distance(dbegin, dend) - 1) is dereferencable
+                - std::distance(dbegin, dend) > 0
+                - net_type uses a supervised loss.  
+                  i.e. net_type::training_label_type != no_label_type.
+            ensures
+                - Runs the given data through the network and computes and records the loss.  
+                - This call does not modify network parameters.  The point of
+                  test_one_step() is two fold, to allow you to observe the accuracy of the
+                  network on hold out data during training, and to allow the trainer to
+                  automatically adjust the learning rate when the test loss stops
+                  improving.  It should be noted that you are not required to use
+                  test_one_step() at all, but if you want to do this kind of thing it is
+                  available.
+                - You can observe the current average loss value by calling get_average_test_loss().
+                - The computation will happen in another thread.  Therefore, after calling
+                  this function you should call get_net() before you touch the net object
+                  from the calling thread to ensure no other threads are still accessing
+                  the network.
+                - #get_test_one_step_calls() == get_test_one_step_calls() + 1.
+        !*/
+
+        void test_one_step (
+            const std::vector<input_type>& data
+        );
+        /*!
+            requires
+                - data.size() > 0
+                - net_type uses an unsupervised loss.  
+                  i.e. net_type::training_label_type == no_label_type.
+            ensures
+                - Runs the given data through the network and computes and records the loss.  
+                - This call does not modify network parameters.  The point of
+                  test_one_step() is two fold, to allow you to observe the accuracy of the
+                  network on hold out data during training, and to allow the trainer to
+                  automatically adjust the learning rate when the test loss stops
+                  improving.  It should be noted that you are not required to use
+                  test_one_step() at all, but if you want to do this kind of thing it is
+                  available.
+                - You can observe the current average loss value by calling get_average_test_loss().
+                - The computation will happen in another thread.  Therefore, after calling
+                  this function you should call get_net() before you touch the net object
+                  from the calling thread to ensure no other threads are still accessing
+                  the network.
+                - #get_test_one_step_calls() == get_test_one_step_calls() + 1.
+        !*/
+
+        template <
+            typename data_iterator
+            >
+        void test_one_step (
+            data_iterator dbegin, 
+            data_iterator dend
+        );
+        /*!
+            requires
+                - std::distance(dbegin, dend) > 0
+                - net_type uses an unsupervised loss.  
+                  i.e. net_type::training_label_type == no_label_type.
+            ensures
+                - Runs the given data through the network and computes and records the loss.  
+                - This call does not modify network parameters.  The point of
+                  test_one_step() is two fold, to allow you to observe the accuracy of the
+                  network on hold out data during training, and to allow the trainer to
+                  automatically adjust the learning rate when the test loss stops
+                  improving.  It should be noted that you are not required to use
+                  test_one_step() at all, but if you want to do this kind of thing it is
+                  available.
+                - You can observe the current average loss value by calling get_average_test_loss().
+                - The computation will happen in another thread.  Therefore, after calling
+                  this function you should call get_net() before you touch the net object
+                  from the calling thread to ensure no other threads are still accessing
+                  the network.
+                - #get_test_one_step_calls() == get_test_one_step_calls() + 1.
+        !*/
+
+        void set_test_iterations_without_progress_threshold (
+            unsigned long thresh 
+        );
+        /*!
+            ensures
+                - #get_test_iterations_without_progress_threshold() == thresh
+                - #get_learning_rate_schedule().size() == 0
+                - This function blocks until all threads inside the dnn_trainer have
+                  stopped touching the net. 
+        !*/
+
+        unsigned long get_test_iterations_without_progress_threshold (
+        ) const;
+        /*!
+            ensures
+                - This object monitors the progress of training and estimates if the
+                  testing error is being reduced.  It does this by looking at the previous
+                  get_test_iterations_without_progress_threshold() mini-batch results from
+                  test_one_step() and applying the statistical test defined by the
+                  running_gradient object to see if the testing error is getting smaller.
+                  If it isn't being reduced then get_learning_rate() is made smaller by a
+                  factor of get_learning_rate_shrink_factor().
+
+                  Therefore, get_test_iterations_without_progress_threshold() should always be
+                  set to something sensibly large so that this test can be done with
+                  reasonably high confidence.  Think of this test as saying "if the testing loss
+                  hasn't decreased for the previous get_test_iterations_without_progress_threshold() 
+                  calls to test_one_step() then shrink the learning rate".
+        !*/
+
+        unsigned long get_test_steps_without_progress (
+        ) const;
+        /*!
+            ensures
+                - if (get_learning_rate_shrink_factor() != 1) then
+                    - returns an estimate of how many mini-batches have executed without us
+                      observing a statistically significant decrease in the testing error
+                      (i.e. the error on the data given to the trainer via test_one_step()
+                      calls).
+                - else
+                    - returns 0
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename net_type, 
+        typename solver_type 
+        >
+    std::ostream& operator<< (
+        std::ostream& out,
+        dnn_trainer<net_type,solver_type>& trainer
+    );
+    /*!
+        ensures
+            - Prints a log of the current parameters of trainer to out.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_DNn_TRAINER_ABSTRACT_H_ 
+
+