diff options
Diffstat (limited to 'ml/dlib/dlib/dnn/trainer_abstract.h')
-rw-r--r-- | ml/dlib/dlib/dnn/trainer_abstract.h | 765 |
1 files changed, 0 insertions, 765 deletions
diff --git a/ml/dlib/dlib/dnn/trainer_abstract.h b/ml/dlib/dlib/dnn/trainer_abstract.h deleted file mode 100644 index 3bfb6dc99..000000000 --- a/ml/dlib/dlib/dnn/trainer_abstract.h +++ /dev/null @@ -1,765 +0,0 @@ -// Copyright (C) 2015 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_DNn_TRAINER_ABSTRACT_H_ -#ifdef DLIB_DNn_TRAINER_ABSTRACT_H_ - -#include "core_abstract.h" -#include "solvers_abstract.h" -#include <vector> -#include <chrono> - - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - enum class force_flush_to_disk { - no = 0, - yes = 1 - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename net_type, - typename solver_type = sgd - > - class dnn_trainer - { - /*! - REQUIREMENTS ON net_type - - net_type is an add_loss_layer object. - - REQUIREMENTS ON solver_type - - solver_type is an implementation of the EXAMPLE_SOLVER interface defined - in solvers_abstract.h - - WHAT THIS OBJECT REPRESENTS - This object is a tool training a deep neural network. To use it you supply - a neural network type and a solver, then you call train() with your - training data and it will output a new network instance that has hopefully - learned something useful from your training data. - - If you are compiling with CUDA then this object will use the GPU that is - currently selected (i.e. the one indicated by cudaGetDevice()) when - dnn_trainer is constructed. It will continue to use that device even if - you later change it by a call to cudaSetDevice(). - - EXCEPTIONS - If an exception is thrown by any part of the neural network during training - then the exception will be propagated out of the trainer to the user. - Moreover, the trainer instance will be unusable and should be destroyed. - !*/ - - public: - - typedef typename net_type::training_label_type training_label_type; - typedef typename net_type::input_type input_type; - const static size_t num_computational_layers = net_type::num_computational_layers; - - dnn_trainer() = delete; - dnn_trainer(const dnn_trainer&) = delete; - dnn_trainer& operator=(const dnn_trainer&) = delete; - - dnn_trainer( - net_type& net, - const solver_type& solver = solver_type(), - const std::vector<int>& cuda_extra_devices = {} - ); - /*! - requires - - for all valid i: - - 0 <= cuda_extra_devices[i] < dlib::cuda::get_num_devices() - ensures - - &#get_net() == &net - (i.e. The dnn_trainer holds a reference to net, it does not copy it. - Therefore, you must ensure net has a lifetime at least as long as the - dnn_trainer). - - #get_solvers() == a set of solvers that are all initialized with the - provided solver instance. - - #get_max_num_epochs() == 10000 - - #get_mini_batch_size() == 128 - - #get_learning_rate() == 1e-2 - - #get_min_learning_rate() == 1e-5 - - #get_iterations_without_progress_threshold() == 2000 - - #get_test_iterations_without_progress_threshold() == 500 - - #get_learning_rate_shrink_factor() == 0.1 - - #get_learning_rate_schedule().size() == 0 - - #get_train_one_step_calls() == 0 - - #get_test_one_step_calls() == 0 - - #get_synchronization_file() == "" - - if (cuda_extra_devices.size() > 0) then - - This object will use multiple graphics cards to run the learning - algorithms. In particular, it will always use whatever device is - currently selected on the calling thread (the device indicated by - cudaGetDevice()). In addition, you can ask to use additional - devices, which you do by putting their device numbers into - cuda_extra_devices. - !*/ - - net_type& get_net ( - force_flush_to_disk force_flush = force_flush_to_disk::yes - ); - /*! - ensures - - returns the neural network object used by this trainer. This is the - network that is optimized when you call train() or train_one_step(). - Recall that the dnn_trainer doesn't contain the net_type object but - simply holds a reference to an external network which was provided to the - dnn_trainer's constructor. - - This function blocks until all threads inside the dnn_trainer have - stopped touching the net. - - If force_flush is yes, then this function will sync the trainer state to - disk if the current state hasn't already been synced to disk since the - last network modification. - !*/ - - const std::vector<solver_type>& get_solvers ( - ) const; - /*! - ensures - - returns the solvers used to optimize each layer of the neural network - get_net(). In particular, the first layer's solver is - get_solvers()[0], the second layer's solver is - get_solvers()[1], and so on. - - This function blocks until all threads inside the dnn_trainer have - stopped touching the net. - !*/ - - unsigned long get_mini_batch_size ( - ) const; - /*! - ensures - - During training, we call the network's update() routine over and over - with training data. The number of training samples we give to each call - to update is the "mini-batch size", which is defined by - get_mini_batch_size(). - !*/ - - void set_mini_batch_size ( - unsigned long batch_size - ); - /*! - requires - - batch_size > 0 - ensures - - #get_mini_batch_size() == batch_size - !*/ - - unsigned long get_max_num_epochs ( - ) const; - /*! - ensures - - train() will execute at most get_max_num_epochs() iterations over the - training data before returning. - !*/ - - void set_max_num_epochs ( - unsigned long num - ); - /*! - requires - - num > 0 - ensures - - #get_max_num_epochs() == num - !*/ - - void set_learning_rate ( - double lr - ); - /*! - requires - - lr > 0 - ensures - - #get_learning_rate() == lr - - #get_learning_rate_schedule().size() == 0 - - This function blocks until all threads inside the dnn_trainer have - stopped touching the net. - !*/ - - double get_learning_rate( - ) const; - /*! - ensures - - During each training step, a solver tells us how to modify the parameters - of each layer in the network. It does this by outputting a step vector - that, when added to the parameters, will hopefully result in improved - network performance. The learning rate is one of the inputs to the - solver and influences the size of this step vector. This function - returns the current learning rate, that is, the learning rate that will - be used during the next training step. - !*/ - - void set_min_learning_rate ( - double lr - ); - /*! - requires - - lr > 0 - ensures - - #get_min_learning_rate() == lr - - #get_learning_rate_schedule().size() == 0 - - This function blocks until all threads inside the dnn_trainer have - stopped touching the net. - !*/ - - double get_min_learning_rate ( - ) const; - /*! - ensures - - During training via this->train(), this object will test if progress is - still being made and if it isn't then it will reduce get_learning_rate() - by setting it to get_learning_rate()*get_learning_rate_shrink_factor(). - However, it will not reduce it below get_min_learning_rate(). Once this - minimum learning rate is crossed the training will terminate. - - get_min_learning_rate() doesn't apply if you are using train_one_step(). - You can keep calling train_one_step() as many times as you want and the - learning rate will drop infinitely close to 0 if you run long enough. - !*/ - - template <typename EXP> - void set_learning_rate_schedule ( - const matrix_exp<EXP>& schedule - ); - /*! - requires - - schedule.size() > 0 - - min(schedule) > 0 - ensures - - #get_learning_rate_schedule() == reshape_to_column_vector(schedule) - - #get_learning_rate() == schedule(0,0) - - #get_min_learning_rate() == min(schedule) - - #set_learning_rate_shrink_factor() == 1 - !*/ - - const matrix<double,0,1>& get_learning_rate_schedule ( - ) const; - /*! - ensures - - if (this function returns a non-empty matrix) then - - This trainer will use an explicit learning rate schedule defined by - the learning rate values in get_learning_rate_schedule(). For - example, if get_learning_rate_schedule() returned {0.1, 0.09, 0.08, - 0.07, 0.06} then the first training mini-batch would use a learning - rate of 0.1, then the next training mini-batch uses 0.09, and then - 0.8, and so on until the end of the schedule is reached. - - If you continue to run training after the end of the schedule has - been reached then the learning rate will be fixed to 0.99 times the - final value. So in our example, eventually the learning rate would - be fixed to 0.99*0.06. This allows you to test if we have reached the - end of the schedule by checking if get_learning_rate() >= 0.06. - !*/ - - unsigned long get_steps_without_progress ( - ) const; - /*! - ensures - - if (get_learning_rate_shrink_factor() != 1) then - - returns an estimate of how many mini-batches have executed without us - observing a statistically significant decrease in the training error. - - else - - returns 0 - !*/ - - void set_iterations_without_progress_threshold ( - unsigned long thresh - ); - /*! - ensures - - #get_iterations_without_progress_threshold() == thresh - - #get_learning_rate_schedule().size() == 0 - - This function blocks until all threads inside the dnn_trainer have - stopped touching the net. - !*/ - - unsigned long get_iterations_without_progress_threshold ( - ) const; - /*! - ensures - - This object monitors the progress of training and estimates if the - training error is being reduced. It does this by looking at the previous - get_iterations_without_progress_threshold() mini-batch results and - applying the statistical test defined by the running_gradient object to - see if the training error is getting smaller. If it isn't being reduced - then get_learning_rate() is made smaller by a factor of get_learning_rate_shrink_factor(). - - Therefore, get_iterations_without_progress_threshold() should always be - set to something sensibly large so that this test can be done with - reasonably high confidence. Think of this test as saying "if the loss - hasn't decreased for the previous get_iterations_without_progress_threshold() - then shrink the learning rate". - !*/ - - void set_learning_rate_shrink_factor ( - double shrink - ); - /*! - requires - - 0 < shrink && shrink <= 1 - ensures - - #get_learning_rate_shrink_factor() == shrink - - #get_learning_rate_schedule().size() == 0 - - This function blocks until all threads inside the dnn_trainer have - stopped touching the net. - !*/ - - double get_learning_rate_shrink_factor ( - ) const; - /*! - ensures - - Whenever the training routine thinks it isn't making progress anymore it - will reduce get_learning_rate() by multiplying it by get_learning_rate_shrink_factor(). - - You can disable the automatic learning rate reduction by setting - get_learning_rate_shrink_factor() to 1. - !*/ - - unsigned long long get_train_one_step_calls ( - ) const; - /*! - ensures - - returns the number of times train_one_step() has been called. - !*/ - - unsigned long long get_test_one_step_calls ( - ) const; - /*! - ensures - - returns the number of times test_one_step() has been called. - !*/ - - void be_verbose ( - ); - /*! - ensures - - This object will print status messages to standard out so that a - user can observe the progress of the algorithm. - !*/ - - void be_quiet ( - ); - /*! - ensures - - This object will not print anything to standard out - !*/ - - void set_synchronization_file ( - const std::string& filename, - std::chrono::seconds time_between_syncs = std::chrono::minutes(15) - ); - /*! - ensures - - #get_synchronization_file() == filename - - While training is running, either via train() or repeated calls to - train_one_step(), this object will save its entire state, including the - state of get_net(), to disk in the file named filename every - time_between_syncs seconds. - - If the filename file already exists then the state of this trainer will - be loaded from that file by this call to set_synchronization_file(). - This allows you to resume a training session which was previously - interrupted. - - It should be noted that when saving, the trainer will alternate between - saving to a file called filename and another file called filename+"_". - We do this because it's possible that your computer might crash (not - because of dlib, just in general) before the data is safely saved to - disk. This way, you will always have a backup file if the write to disk - gets corrupted or is incomplete. Moreover, when loading, we will always - load from the newest of the two possible files. - !*/ - - const std::string& get_synchronization_file ( - ); - /*! - ensures - - Returns the name of the file the dnn_trainer will periodically save it's - state to. If the return value is "" then synchronization is disabled. - !*/ - - void train ( - const std::vector<input_type>& data, - const std::vector<training_label_type>& labels - ); - /*! - requires - - data.size() == labels.size() - - data.size() > 0 - - net_type uses a supervised loss. - i.e. net_type::training_label_type != no_label_type. - ensures - - Trains a supervised neural network based on the given training data. - The goal of training is to find the network parameters that minimize - get_net().compute_loss(data.begin(), data.end(), labels.begin()). - - The optimizer will run until get_learning_rate() < get_min_learning_rate() - or get_max_num_epochs() training epochs have been executed. - - Each layer in the network will be optimized by its corresponding solver - in get_solvers(). - - Each call to train DOES NOT reinitialize the state of get_net() or - get_solvers(). That is, the existing state of the solvers and network is - the starting point for the optimization each time train() is called. In - particular, if you use the set_synchronization_file() method you can - resume an interrupted train() call by simply calling train() again and it - will pick up from the last synchronization point. - - You can obtain the average loss value during the final training epoch by - calling get_average_loss(). - - This function blocks until all threads inside the dnn_trainer have - stopped touching the net. - !*/ - - void train ( - const std::vector<input_type>& data - ); - /*! - requires - - data.size() > 0 - - net_type uses an unsupervised loss. - i.e. net_type::training_label_type == no_label_type. - ensures - - Trains an unsupervised neural network based on the given training data. - The goal of training is to find the network parameters that minimize - get_net().compute_loss(data.begin(), data.end()). - - The optimizer will run until get_learning_rate() < get_min_learning_rate() - or get_max_num_epochs() training epochs have been executed. - - Each layer in the network will be optimized by its corresponding solver - in get_solvers(). - - Each call to train DOES NOT reinitialize the state of get_net() or - get_solvers(). That is, the existing state of the solvers and network is - the starting point for the optimization each time train() is called. In - particular, if you use the set_synchronization_file() method you can - resume an interrupted train() call by simply calling train() again and it - will pick up from the last synchronization point. - - You can obtain the average loss value during the final training epoch by - calling get_average_loss(). - - This function blocks until all threads inside the dnn_trainer have - stopped touching the net. - !*/ - - void train_one_step ( - const std::vector<input_type>& data, - const std::vector<training_label_type>& labels - ); - /*! - requires - - data.size() == labels.size() - - data.size() > 0 - - net_type uses a supervised loss. - i.e. net_type::training_label_type != no_label_type. - ensures - - Performs one stochastic gradient update step based on the mini-batch of - data and labels supplied to this function. In particular, calling - train_one_step() in a loop is equivalent to calling the train() method - defined above. However, train_one_step() allows you to stream data from - disk into the training process while train() requires you to first load - all the training data into RAM. Otherwise, these training methods are - equivalent. - - You can observe the current average loss value by calling get_average_loss(). - - The network training will happen in another thread. Therefore, after - calling this function you should call get_net() before you touch the net - object from the calling thread to ensure no other threads are still - accessing the network. - - #get_train_one_step_calls() == get_train_one_step_calls() + 1. - !*/ - - template < - typename data_iterator, - typename label_iterator - > - void train_one_step ( - data_iterator dbegin, - data_iterator dend, - label_iterator lbegin - ); - /*! - requires - - std::advance(lbegin, std::distance(dbegin, dend) - 1) is dereferencable - - std::distance(dbegin, dend) > 0 - - net_type uses a supervised loss. - i.e. net_type::training_label_type != no_label_type. - ensures - - Performs one stochastic gradient update step based on the mini-batch of - data and labels supplied to this function. In particular, calling - train_one_step() in a loop is equivalent to calling the train() method - defined above. However, train_one_step() allows you to stream data from - disk into the training process while train() requires you to first load - all the training data into RAM. Otherwise, these training methods are - equivalent. - - You can observe the current average loss value by calling get_average_loss(). - - The network training will happen in another thread. Therefore, after - calling this function you should call get_net() before you touch the net - object from the calling thread to ensure no other threads are still - accessing the network. - - #get_train_one_step_calls() == get_train_one_step_calls() + 1. - !*/ - - void train_one_step ( - const std::vector<input_type>& data - ); - /*! - requires - - data.size() > 0 - - net_type uses an unsupervised loss. - i.e. net_type::training_label_type == no_label_type. - ensures - - Performs one stochastic gradient update step based on the mini-batch of - data supplied to this function. In particular, calling train_one_step() - in a loop is equivalent to calling the train() method defined above. - However, train_one_step() allows you to stream data from disk into the - training process while train() requires you to first load all the - training data into RAM. Otherwise, these training methods are - equivalent. - - You can observe the current average loss value by calling get_average_loss(). - - The network training will happen in another thread. Therefore, after - calling this function you should call get_net() before you touch the net - object from the calling thread to ensure no other threads are still - accessing the network. - - #get_train_one_step_calls() == get_train_one_step_calls() + 1. - !*/ - - template < - typename data_iterator - > - void train_one_step ( - data_iterator dbegin, - data_iterator dend - ); - /*! - requires - - std::distance(dbegin, dend) > 0 - - net_type uses an unsupervised loss. - i.e. net_type::training_label_type == no_label_type. - ensures - - Performs one stochastic gradient update step based on the mini-batch of - data supplied to this function. In particular, calling train_one_step() - in a loop is equivalent to calling the train() method defined above. - However, train_one_step() allows you to stream data from disk into the - training process while train() requires you to first load all the - training data into RAM. Otherwise, these training methods are - equivalent. - - You can observe the current average loss value by calling get_average_loss(). - - The network training will happen in another thread. Therefore, after - calling this function you should call get_net() before you touch the net - object from the calling thread to ensure no other threads are still - accessing the network. - - #get_train_one_step_calls() == get_train_one_step_calls() + 1. - !*/ - - double get_average_loss ( - ) const; - /*! - ensures - - returns the average loss value observed during previous calls to - train_one_step() or train(). That is, the average output of - net_type::update() during the previous mini-batch updates. - - Note that, if be_verbose() has been called, then this object will - automatically call clear_average_loss() periodically when it logs the - loss to the console. - - This function blocks until all threads inside the dnn_trainer have - stopped touching the net. - !*/ - - void clear_average_loss ( - ); - /*! - ensures - - #get_average_loss() == 0 - - get_average_loss() uses a dlib::running_stats object to keep a running - average of the loss values seen during the previous mini-batch updates - applied during training. Calling clear_average_loss() resets the - running_stats object so it forgets about all previous loss values - observed. - - This function blocks until all threads inside the dnn_trainer have - stopped touching the net. - !*/ - - // ---------------------- - - double get_average_test_loss ( - ) const; - /*! - ensures - - returns the average loss value observed during previous calls to - test_one_step(). - - This function blocks until all threads inside the dnn_trainer have - stopped touching the net. - !*/ - - void test_one_step ( - const std::vector<input_type>& data, - const std::vector<training_label_type>& labels - ); - /*! - requires - - data.size() == labels.size() - - data.size() > 0 - - net_type uses a supervised loss. - i.e. net_type::training_label_type != no_label_type. - ensures - - Runs the given data through the network and computes and records the loss. - - This call does not modify network parameters. The point of - test_one_step() is two fold, to allow you to observe the accuracy of the - network on hold out data during training, and to allow the trainer to - automatically adjust the learning rate when the test loss stops - improving. It should be noted that you are not required to use - test_one_step() at all, but if you want to do this kind of thing it is - available. - - You can observe the current average loss value by calling get_average_test_loss(). - - The computation will happen in another thread. Therefore, after calling - this function you should call get_net() before you touch the net object - from the calling thread to ensure no other threads are still accessing - the network. - - #get_test_one_step_calls() == get_test_one_step_calls() + 1. - !*/ - - template < - typename data_iterator, - typename label_iterator - > - void test_one_step ( - data_iterator dbegin, - data_iterator dend, - label_iterator lbegin - ); - /*! - requires - - std::advance(lbegin, std::distance(dbegin, dend) - 1) is dereferencable - - std::distance(dbegin, dend) > 0 - - net_type uses a supervised loss. - i.e. net_type::training_label_type != no_label_type. - ensures - - Runs the given data through the network and computes and records the loss. - - This call does not modify network parameters. The point of - test_one_step() is two fold, to allow you to observe the accuracy of the - network on hold out data during training, and to allow the trainer to - automatically adjust the learning rate when the test loss stops - improving. It should be noted that you are not required to use - test_one_step() at all, but if you want to do this kind of thing it is - available. - - You can observe the current average loss value by calling get_average_test_loss(). - - The computation will happen in another thread. Therefore, after calling - this function you should call get_net() before you touch the net object - from the calling thread to ensure no other threads are still accessing - the network. - - #get_test_one_step_calls() == get_test_one_step_calls() + 1. - !*/ - - void test_one_step ( - const std::vector<input_type>& data - ); - /*! - requires - - data.size() > 0 - - net_type uses an unsupervised loss. - i.e. net_type::training_label_type == no_label_type. - ensures - - Runs the given data through the network and computes and records the loss. - - This call does not modify network parameters. The point of - test_one_step() is two fold, to allow you to observe the accuracy of the - network on hold out data during training, and to allow the trainer to - automatically adjust the learning rate when the test loss stops - improving. It should be noted that you are not required to use - test_one_step() at all, but if you want to do this kind of thing it is - available. - - You can observe the current average loss value by calling get_average_test_loss(). - - The computation will happen in another thread. Therefore, after calling - this function you should call get_net() before you touch the net object - from the calling thread to ensure no other threads are still accessing - the network. - - #get_test_one_step_calls() == get_test_one_step_calls() + 1. - !*/ - - template < - typename data_iterator - > - void test_one_step ( - data_iterator dbegin, - data_iterator dend - ); - /*! - requires - - std::distance(dbegin, dend) > 0 - - net_type uses an unsupervised loss. - i.e. net_type::training_label_type == no_label_type. - ensures - - Runs the given data through the network and computes and records the loss. - - This call does not modify network parameters. The point of - test_one_step() is two fold, to allow you to observe the accuracy of the - network on hold out data during training, and to allow the trainer to - automatically adjust the learning rate when the test loss stops - improving. It should be noted that you are not required to use - test_one_step() at all, but if you want to do this kind of thing it is - available. - - You can observe the current average loss value by calling get_average_test_loss(). - - The computation will happen in another thread. Therefore, after calling - this function you should call get_net() before you touch the net object - from the calling thread to ensure no other threads are still accessing - the network. - - #get_test_one_step_calls() == get_test_one_step_calls() + 1. - !*/ - - void set_test_iterations_without_progress_threshold ( - unsigned long thresh - ); - /*! - ensures - - #get_test_iterations_without_progress_threshold() == thresh - - #get_learning_rate_schedule().size() == 0 - - This function blocks until all threads inside the dnn_trainer have - stopped touching the net. - !*/ - - unsigned long get_test_iterations_without_progress_threshold ( - ) const; - /*! - ensures - - This object monitors the progress of training and estimates if the - testing error is being reduced. It does this by looking at the previous - get_test_iterations_without_progress_threshold() mini-batch results from - test_one_step() and applying the statistical test defined by the - running_gradient object to see if the testing error is getting smaller. - If it isn't being reduced then get_learning_rate() is made smaller by a - factor of get_learning_rate_shrink_factor(). - - Therefore, get_test_iterations_without_progress_threshold() should always be - set to something sensibly large so that this test can be done with - reasonably high confidence. Think of this test as saying "if the testing loss - hasn't decreased for the previous get_test_iterations_without_progress_threshold() - calls to test_one_step() then shrink the learning rate". - !*/ - - unsigned long get_test_steps_without_progress ( - ) const; - /*! - ensures - - if (get_learning_rate_shrink_factor() != 1) then - - returns an estimate of how many mini-batches have executed without us - observing a statistically significant decrease in the testing error - (i.e. the error on the data given to the trainer via test_one_step() - calls). - - else - - returns 0 - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename net_type, - typename solver_type - > - std::ostream& operator<< ( - std::ostream& out, - dnn_trainer<net_type,solver_type>& trainer - ); - /*! - ensures - - Prints a log of the current parameters of trainer to out. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_DNn_TRAINER_ABSTRACT_H_ - - |