summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/dnn/trainer_abstract.h
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/dlib/dnn/trainer_abstract.h')
-rw-r--r--ml/dlib/dlib/dnn/trainer_abstract.h765
1 files changed, 765 insertions, 0 deletions
diff --git a/ml/dlib/dlib/dnn/trainer_abstract.h b/ml/dlib/dlib/dnn/trainer_abstract.h
new file mode 100644
index 000000000..3bfb6dc99
--- /dev/null
+++ b/ml/dlib/dlib/dnn/trainer_abstract.h
@@ -0,0 +1,765 @@
+// Copyright (C) 2015 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_DNn_TRAINER_ABSTRACT_H_
+#ifdef DLIB_DNn_TRAINER_ABSTRACT_H_
+
+#include "core_abstract.h"
+#include "solvers_abstract.h"
+#include <vector>
+#include <chrono>
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ enum class force_flush_to_disk {
+ no = 0,
+ yes = 1
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename net_type,
+ typename solver_type = sgd
+ >
+ class dnn_trainer
+ {
+ /*!
+ REQUIREMENTS ON net_type
+ - net_type is an add_loss_layer object.
+
+ REQUIREMENTS ON solver_type
+ - solver_type is an implementation of the EXAMPLE_SOLVER interface defined
+ in solvers_abstract.h
+
+ WHAT THIS OBJECT REPRESENTS
+ This object is a tool training a deep neural network. To use it you supply
+ a neural network type and a solver, then you call train() with your
+ training data and it will output a new network instance that has hopefully
+ learned something useful from your training data.
+
+ If you are compiling with CUDA then this object will use the GPU that is
+ currently selected (i.e. the one indicated by cudaGetDevice()) when
+ dnn_trainer is constructed. It will continue to use that device even if
+ you later change it by a call to cudaSetDevice().
+
+ EXCEPTIONS
+ If an exception is thrown by any part of the neural network during training
+ then the exception will be propagated out of the trainer to the user.
+ Moreover, the trainer instance will be unusable and should be destroyed.
+ !*/
+
+ public:
+
+ typedef typename net_type::training_label_type training_label_type;
+ typedef typename net_type::input_type input_type;
+ const static size_t num_computational_layers = net_type::num_computational_layers;
+
+ dnn_trainer() = delete;
+ dnn_trainer(const dnn_trainer&) = delete;
+ dnn_trainer& operator=(const dnn_trainer&) = delete;
+
+ dnn_trainer(
+ net_type& net,
+ const solver_type& solver = solver_type(),
+ const std::vector<int>& cuda_extra_devices = {}
+ );
+ /*!
+ requires
+ - for all valid i:
+ - 0 <= cuda_extra_devices[i] < dlib::cuda::get_num_devices()
+ ensures
+ - &#get_net() == &net
+ (i.e. The dnn_trainer holds a reference to net, it does not copy it.
+ Therefore, you must ensure net has a lifetime at least as long as the
+ dnn_trainer).
+ - #get_solvers() == a set of solvers that are all initialized with the
+ provided solver instance.
+ - #get_max_num_epochs() == 10000
+ - #get_mini_batch_size() == 128
+ - #get_learning_rate() == 1e-2
+ - #get_min_learning_rate() == 1e-5
+ - #get_iterations_without_progress_threshold() == 2000
+ - #get_test_iterations_without_progress_threshold() == 500
+ - #get_learning_rate_shrink_factor() == 0.1
+ - #get_learning_rate_schedule().size() == 0
+ - #get_train_one_step_calls() == 0
+ - #get_test_one_step_calls() == 0
+ - #get_synchronization_file() == ""
+ - if (cuda_extra_devices.size() > 0) then
+ - This object will use multiple graphics cards to run the learning
+ algorithms. In particular, it will always use whatever device is
+ currently selected on the calling thread (the device indicated by
+ cudaGetDevice()). In addition, you can ask to use additional
+ devices, which you do by putting their device numbers into
+ cuda_extra_devices.
+ !*/
+
+ net_type& get_net (
+ force_flush_to_disk force_flush = force_flush_to_disk::yes
+ );
+ /*!
+ ensures
+ - returns the neural network object used by this trainer. This is the
+ network that is optimized when you call train() or train_one_step().
+ Recall that the dnn_trainer doesn't contain the net_type object but
+ simply holds a reference to an external network which was provided to the
+ dnn_trainer's constructor.
+ - This function blocks until all threads inside the dnn_trainer have
+ stopped touching the net.
+ - If force_flush is yes, then this function will sync the trainer state to
+ disk if the current state hasn't already been synced to disk since the
+ last network modification.
+ !*/
+
+ const std::vector<solver_type>& get_solvers (
+ ) const;
+ /*!
+ ensures
+ - returns the solvers used to optimize each layer of the neural network
+ get_net(). In particular, the first layer's solver is
+ get_solvers()[0], the second layer's solver is
+ get_solvers()[1], and so on.
+ - This function blocks until all threads inside the dnn_trainer have
+ stopped touching the net.
+ !*/
+
+ unsigned long get_mini_batch_size (
+ ) const;
+ /*!
+ ensures
+ - During training, we call the network's update() routine over and over
+ with training data. The number of training samples we give to each call
+ to update is the "mini-batch size", which is defined by
+ get_mini_batch_size().
+ !*/
+
+ void set_mini_batch_size (
+ unsigned long batch_size
+ );
+ /*!
+ requires
+ - batch_size > 0
+ ensures
+ - #get_mini_batch_size() == batch_size
+ !*/
+
+ unsigned long get_max_num_epochs (
+ ) const;
+ /*!
+ ensures
+ - train() will execute at most get_max_num_epochs() iterations over the
+ training data before returning.
+ !*/
+
+ void set_max_num_epochs (
+ unsigned long num
+ );
+ /*!
+ requires
+ - num > 0
+ ensures
+ - #get_max_num_epochs() == num
+ !*/
+
+ void set_learning_rate (
+ double lr
+ );
+ /*!
+ requires
+ - lr > 0
+ ensures
+ - #get_learning_rate() == lr
+ - #get_learning_rate_schedule().size() == 0
+ - This function blocks until all threads inside the dnn_trainer have
+ stopped touching the net.
+ !*/
+
+ double get_learning_rate(
+ ) const;
+ /*!
+ ensures
+ - During each training step, a solver tells us how to modify the parameters
+ of each layer in the network. It does this by outputting a step vector
+ that, when added to the parameters, will hopefully result in improved
+ network performance. The learning rate is one of the inputs to the
+ solver and influences the size of this step vector. This function
+ returns the current learning rate, that is, the learning rate that will
+ be used during the next training step.
+ !*/
+
+ void set_min_learning_rate (
+ double lr
+ );
+ /*!
+ requires
+ - lr > 0
+ ensures
+ - #get_min_learning_rate() == lr
+ - #get_learning_rate_schedule().size() == 0
+ - This function blocks until all threads inside the dnn_trainer have
+ stopped touching the net.
+ !*/
+
+ double get_min_learning_rate (
+ ) const;
+ /*!
+ ensures
+ - During training via this->train(), this object will test if progress is
+ still being made and if it isn't then it will reduce get_learning_rate()
+ by setting it to get_learning_rate()*get_learning_rate_shrink_factor().
+ However, it will not reduce it below get_min_learning_rate(). Once this
+ minimum learning rate is crossed the training will terminate.
+ - get_min_learning_rate() doesn't apply if you are using train_one_step().
+ You can keep calling train_one_step() as many times as you want and the
+ learning rate will drop infinitely close to 0 if you run long enough.
+ !*/
+
+ template <typename EXP>
+ void set_learning_rate_schedule (
+ const matrix_exp<EXP>& schedule
+ );
+ /*!
+ requires
+ - schedule.size() > 0
+ - min(schedule) > 0
+ ensures
+ - #get_learning_rate_schedule() == reshape_to_column_vector(schedule)
+ - #get_learning_rate() == schedule(0,0)
+ - #get_min_learning_rate() == min(schedule)
+ - #set_learning_rate_shrink_factor() == 1
+ !*/
+
+ const matrix<double,0,1>& get_learning_rate_schedule (
+ ) const;
+ /*!
+ ensures
+ - if (this function returns a non-empty matrix) then
+ - This trainer will use an explicit learning rate schedule defined by
+ the learning rate values in get_learning_rate_schedule(). For
+ example, if get_learning_rate_schedule() returned {0.1, 0.09, 0.08,
+ 0.07, 0.06} then the first training mini-batch would use a learning
+ rate of 0.1, then the next training mini-batch uses 0.09, and then
+ 0.8, and so on until the end of the schedule is reached.
+
+ If you continue to run training after the end of the schedule has
+ been reached then the learning rate will be fixed to 0.99 times the
+ final value. So in our example, eventually the learning rate would
+ be fixed to 0.99*0.06. This allows you to test if we have reached the
+ end of the schedule by checking if get_learning_rate() >= 0.06.
+ !*/
+
+ unsigned long get_steps_without_progress (
+ ) const;
+ /*!
+ ensures
+ - if (get_learning_rate_shrink_factor() != 1) then
+ - returns an estimate of how many mini-batches have executed without us
+ observing a statistically significant decrease in the training error.
+ - else
+ - returns 0
+ !*/
+
+ void set_iterations_without_progress_threshold (
+ unsigned long thresh
+ );
+ /*!
+ ensures
+ - #get_iterations_without_progress_threshold() == thresh
+ - #get_learning_rate_schedule().size() == 0
+ - This function blocks until all threads inside the dnn_trainer have
+ stopped touching the net.
+ !*/
+
+ unsigned long get_iterations_without_progress_threshold (
+ ) const;
+ /*!
+ ensures
+ - This object monitors the progress of training and estimates if the
+ training error is being reduced. It does this by looking at the previous
+ get_iterations_without_progress_threshold() mini-batch results and
+ applying the statistical test defined by the running_gradient object to
+ see if the training error is getting smaller. If it isn't being reduced
+ then get_learning_rate() is made smaller by a factor of get_learning_rate_shrink_factor().
+
+ Therefore, get_iterations_without_progress_threshold() should always be
+ set to something sensibly large so that this test can be done with
+ reasonably high confidence. Think of this test as saying "if the loss
+ hasn't decreased for the previous get_iterations_without_progress_threshold()
+ then shrink the learning rate".
+ !*/
+
+ void set_learning_rate_shrink_factor (
+ double shrink
+ );
+ /*!
+ requires
+ - 0 < shrink && shrink <= 1
+ ensures
+ - #get_learning_rate_shrink_factor() == shrink
+ - #get_learning_rate_schedule().size() == 0
+ - This function blocks until all threads inside the dnn_trainer have
+ stopped touching the net.
+ !*/
+
+ double get_learning_rate_shrink_factor (
+ ) const;
+ /*!
+ ensures
+ - Whenever the training routine thinks it isn't making progress anymore it
+ will reduce get_learning_rate() by multiplying it by get_learning_rate_shrink_factor().
+ - You can disable the automatic learning rate reduction by setting
+ get_learning_rate_shrink_factor() to 1.
+ !*/
+
+ unsigned long long get_train_one_step_calls (
+ ) const;
+ /*!
+ ensures
+ - returns the number of times train_one_step() has been called.
+ !*/
+
+ unsigned long long get_test_one_step_calls (
+ ) const;
+ /*!
+ ensures
+ - returns the number of times test_one_step() has been called.
+ !*/
+
+ void be_verbose (
+ );
+ /*!
+ ensures
+ - This object will print status messages to standard out so that a
+ user can observe the progress of the algorithm.
+ !*/
+
+ void be_quiet (
+ );
+ /*!
+ ensures
+ - This object will not print anything to standard out
+ !*/
+
+ void set_synchronization_file (
+ const std::string& filename,
+ std::chrono::seconds time_between_syncs = std::chrono::minutes(15)
+ );
+ /*!
+ ensures
+ - #get_synchronization_file() == filename
+ - While training is running, either via train() or repeated calls to
+ train_one_step(), this object will save its entire state, including the
+ state of get_net(), to disk in the file named filename every
+ time_between_syncs seconds.
+ - If the filename file already exists then the state of this trainer will
+ be loaded from that file by this call to set_synchronization_file().
+ This allows you to resume a training session which was previously
+ interrupted.
+ - It should be noted that when saving, the trainer will alternate between
+ saving to a file called filename and another file called filename+"_".
+ We do this because it's possible that your computer might crash (not
+ because of dlib, just in general) before the data is safely saved to
+ disk. This way, you will always have a backup file if the write to disk
+ gets corrupted or is incomplete. Moreover, when loading, we will always
+ load from the newest of the two possible files.
+ !*/
+
+ const std::string& get_synchronization_file (
+ );
+ /*!
+ ensures
+ - Returns the name of the file the dnn_trainer will periodically save it's
+ state to. If the return value is "" then synchronization is disabled.
+ !*/
+
+ void train (
+ const std::vector<input_type>& data,
+ const std::vector<training_label_type>& labels
+ );
+ /*!
+ requires
+ - data.size() == labels.size()
+ - data.size() > 0
+ - net_type uses a supervised loss.
+ i.e. net_type::training_label_type != no_label_type.
+ ensures
+ - Trains a supervised neural network based on the given training data.
+ The goal of training is to find the network parameters that minimize
+ get_net().compute_loss(data.begin(), data.end(), labels.begin()).
+ - The optimizer will run until get_learning_rate() < get_min_learning_rate()
+ or get_max_num_epochs() training epochs have been executed.
+ - Each layer in the network will be optimized by its corresponding solver
+ in get_solvers().
+ - Each call to train DOES NOT reinitialize the state of get_net() or
+ get_solvers(). That is, the existing state of the solvers and network is
+ the starting point for the optimization each time train() is called. In
+ particular, if you use the set_synchronization_file() method you can
+ resume an interrupted train() call by simply calling train() again and it
+ will pick up from the last synchronization point.
+ - You can obtain the average loss value during the final training epoch by
+ calling get_average_loss().
+ - This function blocks until all threads inside the dnn_trainer have
+ stopped touching the net.
+ !*/
+
+ void train (
+ const std::vector<input_type>& data
+ );
+ /*!
+ requires
+ - data.size() > 0
+ - net_type uses an unsupervised loss.
+ i.e. net_type::training_label_type == no_label_type.
+ ensures
+ - Trains an unsupervised neural network based on the given training data.
+ The goal of training is to find the network parameters that minimize
+ get_net().compute_loss(data.begin(), data.end()).
+ - The optimizer will run until get_learning_rate() < get_min_learning_rate()
+ or get_max_num_epochs() training epochs have been executed.
+ - Each layer in the network will be optimized by its corresponding solver
+ in get_solvers().
+ - Each call to train DOES NOT reinitialize the state of get_net() or
+ get_solvers(). That is, the existing state of the solvers and network is
+ the starting point for the optimization each time train() is called. In
+ particular, if you use the set_synchronization_file() method you can
+ resume an interrupted train() call by simply calling train() again and it
+ will pick up from the last synchronization point.
+ - You can obtain the average loss value during the final training epoch by
+ calling get_average_loss().
+ - This function blocks until all threads inside the dnn_trainer have
+ stopped touching the net.
+ !*/
+
+ void train_one_step (
+ const std::vector<input_type>& data,
+ const std::vector<training_label_type>& labels
+ );
+ /*!
+ requires
+ - data.size() == labels.size()
+ - data.size() > 0
+ - net_type uses a supervised loss.
+ i.e. net_type::training_label_type != no_label_type.
+ ensures
+ - Performs one stochastic gradient update step based on the mini-batch of
+ data and labels supplied to this function. In particular, calling
+ train_one_step() in a loop is equivalent to calling the train() method
+ defined above. However, train_one_step() allows you to stream data from
+ disk into the training process while train() requires you to first load
+ all the training data into RAM. Otherwise, these training methods are
+ equivalent.
+ - You can observe the current average loss value by calling get_average_loss().
+ - The network training will happen in another thread. Therefore, after
+ calling this function you should call get_net() before you touch the net
+ object from the calling thread to ensure no other threads are still
+ accessing the network.
+ - #get_train_one_step_calls() == get_train_one_step_calls() + 1.
+ !*/
+
+ template <
+ typename data_iterator,
+ typename label_iterator
+ >
+ void train_one_step (
+ data_iterator dbegin,
+ data_iterator dend,
+ label_iterator lbegin
+ );
+ /*!
+ requires
+ - std::advance(lbegin, std::distance(dbegin, dend) - 1) is dereferencable
+ - std::distance(dbegin, dend) > 0
+ - net_type uses a supervised loss.
+ i.e. net_type::training_label_type != no_label_type.
+ ensures
+ - Performs one stochastic gradient update step based on the mini-batch of
+ data and labels supplied to this function. In particular, calling
+ train_one_step() in a loop is equivalent to calling the train() method
+ defined above. However, train_one_step() allows you to stream data from
+ disk into the training process while train() requires you to first load
+ all the training data into RAM. Otherwise, these training methods are
+ equivalent.
+ - You can observe the current average loss value by calling get_average_loss().
+ - The network training will happen in another thread. Therefore, after
+ calling this function you should call get_net() before you touch the net
+ object from the calling thread to ensure no other threads are still
+ accessing the network.
+ - #get_train_one_step_calls() == get_train_one_step_calls() + 1.
+ !*/
+
+ void train_one_step (
+ const std::vector<input_type>& data
+ );
+ /*!
+ requires
+ - data.size() > 0
+ - net_type uses an unsupervised loss.
+ i.e. net_type::training_label_type == no_label_type.
+ ensures
+ - Performs one stochastic gradient update step based on the mini-batch of
+ data supplied to this function. In particular, calling train_one_step()
+ in a loop is equivalent to calling the train() method defined above.
+ However, train_one_step() allows you to stream data from disk into the
+ training process while train() requires you to first load all the
+ training data into RAM. Otherwise, these training methods are
+ equivalent.
+ - You can observe the current average loss value by calling get_average_loss().
+ - The network training will happen in another thread. Therefore, after
+ calling this function you should call get_net() before you touch the net
+ object from the calling thread to ensure no other threads are still
+ accessing the network.
+ - #get_train_one_step_calls() == get_train_one_step_calls() + 1.
+ !*/
+
+ template <
+ typename data_iterator
+ >
+ void train_one_step (
+ data_iterator dbegin,
+ data_iterator dend
+ );
+ /*!
+ requires
+ - std::distance(dbegin, dend) > 0
+ - net_type uses an unsupervised loss.
+ i.e. net_type::training_label_type == no_label_type.
+ ensures
+ - Performs one stochastic gradient update step based on the mini-batch of
+ data supplied to this function. In particular, calling train_one_step()
+ in a loop is equivalent to calling the train() method defined above.
+ However, train_one_step() allows you to stream data from disk into the
+ training process while train() requires you to first load all the
+ training data into RAM. Otherwise, these training methods are
+ equivalent.
+ - You can observe the current average loss value by calling get_average_loss().
+ - The network training will happen in another thread. Therefore, after
+ calling this function you should call get_net() before you touch the net
+ object from the calling thread to ensure no other threads are still
+ accessing the network.
+ - #get_train_one_step_calls() == get_train_one_step_calls() + 1.
+ !*/
+
+ double get_average_loss (
+ ) const;
+ /*!
+ ensures
+ - returns the average loss value observed during previous calls to
+ train_one_step() or train(). That is, the average output of
+ net_type::update() during the previous mini-batch updates.
+ - Note that, if be_verbose() has been called, then this object will
+ automatically call clear_average_loss() periodically when it logs the
+ loss to the console.
+ - This function blocks until all threads inside the dnn_trainer have
+ stopped touching the net.
+ !*/
+
+ void clear_average_loss (
+ );
+ /*!
+ ensures
+ - #get_average_loss() == 0
+ - get_average_loss() uses a dlib::running_stats object to keep a running
+ average of the loss values seen during the previous mini-batch updates
+ applied during training. Calling clear_average_loss() resets the
+ running_stats object so it forgets about all previous loss values
+ observed.
+ - This function blocks until all threads inside the dnn_trainer have
+ stopped touching the net.
+ !*/
+
+ // ----------------------
+
+ double get_average_test_loss (
+ ) const;
+ /*!
+ ensures
+ - returns the average loss value observed during previous calls to
+ test_one_step().
+ - This function blocks until all threads inside the dnn_trainer have
+ stopped touching the net.
+ !*/
+
+ void test_one_step (
+ const std::vector<input_type>& data,
+ const std::vector<training_label_type>& labels
+ );
+ /*!
+ requires
+ - data.size() == labels.size()
+ - data.size() > 0
+ - net_type uses a supervised loss.
+ i.e. net_type::training_label_type != no_label_type.
+ ensures
+ - Runs the given data through the network and computes and records the loss.
+ - This call does not modify network parameters. The point of
+ test_one_step() is two fold, to allow you to observe the accuracy of the
+ network on hold out data during training, and to allow the trainer to
+ automatically adjust the learning rate when the test loss stops
+ improving. It should be noted that you are not required to use
+ test_one_step() at all, but if you want to do this kind of thing it is
+ available.
+ - You can observe the current average loss value by calling get_average_test_loss().
+ - The computation will happen in another thread. Therefore, after calling
+ this function you should call get_net() before you touch the net object
+ from the calling thread to ensure no other threads are still accessing
+ the network.
+ - #get_test_one_step_calls() == get_test_one_step_calls() + 1.
+ !*/
+
+ template <
+ typename data_iterator,
+ typename label_iterator
+ >
+ void test_one_step (
+ data_iterator dbegin,
+ data_iterator dend,
+ label_iterator lbegin
+ );
+ /*!
+ requires
+ - std::advance(lbegin, std::distance(dbegin, dend) - 1) is dereferencable
+ - std::distance(dbegin, dend) > 0
+ - net_type uses a supervised loss.
+ i.e. net_type::training_label_type != no_label_type.
+ ensures
+ - Runs the given data through the network and computes and records the loss.
+ - This call does not modify network parameters. The point of
+ test_one_step() is two fold, to allow you to observe the accuracy of the
+ network on hold out data during training, and to allow the trainer to
+ automatically adjust the learning rate when the test loss stops
+ improving. It should be noted that you are not required to use
+ test_one_step() at all, but if you want to do this kind of thing it is
+ available.
+ - You can observe the current average loss value by calling get_average_test_loss().
+ - The computation will happen in another thread. Therefore, after calling
+ this function you should call get_net() before you touch the net object
+ from the calling thread to ensure no other threads are still accessing
+ the network.
+ - #get_test_one_step_calls() == get_test_one_step_calls() + 1.
+ !*/
+
+ void test_one_step (
+ const std::vector<input_type>& data
+ );
+ /*!
+ requires
+ - data.size() > 0
+ - net_type uses an unsupervised loss.
+ i.e. net_type::training_label_type == no_label_type.
+ ensures
+ - Runs the given data through the network and computes and records the loss.
+ - This call does not modify network parameters. The point of
+ test_one_step() is two fold, to allow you to observe the accuracy of the
+ network on hold out data during training, and to allow the trainer to
+ automatically adjust the learning rate when the test loss stops
+ improving. It should be noted that you are not required to use
+ test_one_step() at all, but if you want to do this kind of thing it is
+ available.
+ - You can observe the current average loss value by calling get_average_test_loss().
+ - The computation will happen in another thread. Therefore, after calling
+ this function you should call get_net() before you touch the net object
+ from the calling thread to ensure no other threads are still accessing
+ the network.
+ - #get_test_one_step_calls() == get_test_one_step_calls() + 1.
+ !*/
+
+ template <
+ typename data_iterator
+ >
+ void test_one_step (
+ data_iterator dbegin,
+ data_iterator dend
+ );
+ /*!
+ requires
+ - std::distance(dbegin, dend) > 0
+ - net_type uses an unsupervised loss.
+ i.e. net_type::training_label_type == no_label_type.
+ ensures
+ - Runs the given data through the network and computes and records the loss.
+ - This call does not modify network parameters. The point of
+ test_one_step() is two fold, to allow you to observe the accuracy of the
+ network on hold out data during training, and to allow the trainer to
+ automatically adjust the learning rate when the test loss stops
+ improving. It should be noted that you are not required to use
+ test_one_step() at all, but if you want to do this kind of thing it is
+ available.
+ - You can observe the current average loss value by calling get_average_test_loss().
+ - The computation will happen in another thread. Therefore, after calling
+ this function you should call get_net() before you touch the net object
+ from the calling thread to ensure no other threads are still accessing
+ the network.
+ - #get_test_one_step_calls() == get_test_one_step_calls() + 1.
+ !*/
+
+ void set_test_iterations_without_progress_threshold (
+ unsigned long thresh
+ );
+ /*!
+ ensures
+ - #get_test_iterations_without_progress_threshold() == thresh
+ - #get_learning_rate_schedule().size() == 0
+ - This function blocks until all threads inside the dnn_trainer have
+ stopped touching the net.
+ !*/
+
+ unsigned long get_test_iterations_without_progress_threshold (
+ ) const;
+ /*!
+ ensures
+ - This object monitors the progress of training and estimates if the
+ testing error is being reduced. It does this by looking at the previous
+ get_test_iterations_without_progress_threshold() mini-batch results from
+ test_one_step() and applying the statistical test defined by the
+ running_gradient object to see if the testing error is getting smaller.
+ If it isn't being reduced then get_learning_rate() is made smaller by a
+ factor of get_learning_rate_shrink_factor().
+
+ Therefore, get_test_iterations_without_progress_threshold() should always be
+ set to something sensibly large so that this test can be done with
+ reasonably high confidence. Think of this test as saying "if the testing loss
+ hasn't decreased for the previous get_test_iterations_without_progress_threshold()
+ calls to test_one_step() then shrink the learning rate".
+ !*/
+
+ unsigned long get_test_steps_without_progress (
+ ) const;
+ /*!
+ ensures
+ - if (get_learning_rate_shrink_factor() != 1) then
+ - returns an estimate of how many mini-batches have executed without us
+ observing a statistically significant decrease in the testing error
+ (i.e. the error on the data given to the trainer via test_one_step()
+ calls).
+ - else
+ - returns 0
+ !*/
+
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename net_type,
+ typename solver_type
+ >
+ std::ostream& operator<< (
+ std::ostream& out,
+ dnn_trainer<net_type,solver_type>& trainer
+ );
+ /*!
+ ensures
+ - Prints a log of the current parameters of trainer to out.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_DNn_TRAINER_ABSTRACT_H_
+
+