diff options
Diffstat (limited to 'ml/dlib/dlib/svm/structural_object_detection_trainer_abstract.h')
-rw-r--r-- | ml/dlib/dlib/svm/structural_object_detection_trainer_abstract.h | 390 |
1 files changed, 390 insertions, 0 deletions
diff --git a/ml/dlib/dlib/svm/structural_object_detection_trainer_abstract.h b/ml/dlib/dlib/svm/structural_object_detection_trainer_abstract.h new file mode 100644 index 000000000..2dd799874 --- /dev/null +++ b/ml/dlib/dlib/svm/structural_object_detection_trainer_abstract.h @@ -0,0 +1,390 @@ +// Copyright (C) 2011 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_STRUCTURAL_OBJECT_DETECTION_TRAiNER_H_ABSTRACTh_ +#ifdef DLIB_STRUCTURAL_OBJECT_DETECTION_TRAiNER_H_ABSTRACTh_ + +#include "structural_svm_object_detection_problem_abstract.h" +#include "../image_processing/object_detector_abstract.h" +#include "../image_processing/box_overlap_testing_abstract.h" +#include "../image_processing/full_object_detection_abstract.h" + + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_scanner_type + > + class structural_object_detection_trainer : noncopyable + { + /*! + REQUIREMENTS ON image_scanner_type + image_scanner_type must be an implementation of + dlib/image_processing/scan_fhog_pyramid_abstract.h or + dlib/image_processing/scan_image_custom_abstract.h or + dlib/image_processing/scan_image_pyramid_abstract.h or + dlib/image_processing/scan_image_boxes_abstract.h + + WHAT THIS OBJECT REPRESENTS + This object is a tool for learning to detect objects in images based on a + set of labeled images. The training procedure produces an object_detector + which can be used to predict the locations of objects in new images. + + Note that this is just a convenience wrapper around the structural_svm_object_detection_problem + to make it look similar to all the other trainers in dlib. + !*/ + + public: + typedef double scalar_type; + typedef default_memory_manager mem_manager_type; + typedef object_detector<image_scanner_type> trained_function_type; + + + explicit structural_object_detection_trainer ( + const image_scanner_type& scanner + ); + /*! + requires + - scanner.get_num_detection_templates() > 0 + ensures + - #get_c() == 1 + - this object isn't verbose + - #get_epsilon() == 0.1 + - #get_num_threads() == 2 + - #get_max_cache_size() == 5 + - #get_match_eps() == 0.5 + - #get_loss_per_missed_target() == 1 + - #get_loss_per_false_alarm() == 1 + - This object will attempt to learn a model for the given + scanner object when train() is called. + - #get_scanner() == scanner + (note that only the "configuration" of scanner is copied. + I.e. the copy is done using copy_configuration()) + - #auto_set_overlap_tester() == true + !*/ + + const image_scanner_type& get_scanner ( + ) const; + /*! + ensures + - returns the image scanner used by this object. + !*/ + + bool auto_set_overlap_tester ( + ) const; + /*! + ensures + - if (this object will automatically determine an appropriate + state for the overlap tester used for non-max suppression.) then + - returns true + - In this case, it is determined using the find_tight_overlap_tester() + routine based on the truth_object_detections given to the + structural_object_detection_trainer::train() method. + - else + - returns false + !*/ + + void set_overlap_tester ( + const test_box_overlap& tester + ); + /*! + ensures + - #get_overlap_tester() == tester + - #auto_set_overlap_tester() == false + !*/ + + test_box_overlap get_overlap_tester ( + ) const; + /*! + requires + - auto_set_overlap_tester() == false + ensures + - returns the overlap tester object which will be used to perform non-max suppression. + In particular, this function returns the overlap tester which will populate the + object_detector returned by train(). + !*/ + + void set_num_threads ( + unsigned long num + ); + /*! + ensures + - #get_num_threads() == num + !*/ + + unsigned long get_num_threads ( + ) const; + /*! + ensures + - returns the number of threads used during training. You should + usually set this equal to the number of processing cores on your + machine. + !*/ + + void set_epsilon ( + scalar_type eps + ); + /*! + requires + - eps > 0 + ensures + - #get_epsilon() == eps + !*/ + + const scalar_type get_epsilon ( + ) const; + /*! + ensures + - returns the error epsilon that determines when training should stop. + Smaller values may result in a more accurate solution but take longer + to train. You can think of this epsilon value as saying "solve the + optimization problem until the average loss per sample is within epsilon + of its optimal value". + !*/ + + void set_max_cache_size ( + unsigned long max_size + ); + /*! + ensures + - #get_max_cache_size() == max_size + !*/ + + unsigned long get_max_cache_size ( + ) const; + /*! + ensures + - During training, this object basically runs the object detector on + each image, over and over. To speed this up, it is possible to cache + the results of these detector invocations. This function returns the + number of cache elements per training sample kept in the cache. Note + that a value of 0 means caching is not used at all. Note also that + each cache element takes up about sizeof(double)*scanner.get_num_dimensions() + memory (where scanner is the scanner given to this object's constructor). + !*/ + + void be_verbose ( + ); + /*! + ensures + - This object will print status messages to standard out so that a + user can observe the progress of the algorithm. + !*/ + + void be_quiet ( + ); + /*! + ensures + - this object will not print anything to standard out + !*/ + + void set_oca ( + const oca& item + ); + /*! + ensures + - #get_oca() == item + !*/ + + const oca get_oca ( + ) const; + /*! + ensures + - returns a copy of the optimizer used to solve the structural SVM problem. + !*/ + + void set_c ( + scalar_type C + ); + /*! + requires + - C > 0 + ensures + - #get_c() = C + !*/ + + const scalar_type get_c ( + ) const; + /*! + ensures + - returns the SVM regularization parameter. It is the parameter + that determines the trade-off between trying to fit the training + data (i.e. minimize the loss) or allowing more errors but hopefully + improving the generalization of the resulting detector. Larger + values encourage exact fitting while smaller values of C may encourage + better generalization. + !*/ + + void set_match_eps ( + double eps + ); + /*! + requires + - 0 < eps < 1 + ensures + - #get_match_eps() == eps + !*/ + + double get_match_eps ( + ) const; + /*! + ensures + - returns the amount of alignment necessary for a detection to be considered + as matching with a ground truth rectangle. If it doesn't match then + it is considered to be a false alarm. To define this precisely, let + A and B be two rectangles, then A and B match if and only if: + A.intersect(B).area()/(A+B).area() > get_match_eps() + !*/ + + double get_loss_per_missed_target ( + ) const; + /*! + ensures + - returns the amount of loss experienced for failing to detect one of the + targets. If you care more about finding targets than having a low false + alarm rate then you can increase this value. + !*/ + + void set_loss_per_missed_target ( + double loss + ); + /*! + requires + - loss > 0 + ensures + - #get_loss_per_missed_target() == loss + !*/ + + double get_loss_per_false_alarm ( + ) const; + /*! + ensures + - returns the amount of loss experienced for emitting a false alarm detection. + Or in other words, the loss for generating a detection that doesn't correspond + to one of the truth rectangles. If you care more about having a low false + alarm rate than finding all the targets then you can increase this value. + !*/ + + void set_loss_per_false_alarm ( + double loss + ); + /*! + requires + - loss > 0 + ensures + - #get_loss_per_false_alarm() == loss + !*/ + + template < + typename image_array_type + > + const trained_function_type train ( + const image_array_type& images, + const std::vector<std::vector<full_object_detection> >& truth_object_detections + ) const; + /*! + requires + - is_learning_problem(images, truth_object_detections) == true + - it must be valid to pass images[0] into the image_scanner_type::load() method. + (also, image_array_type must be an implementation of dlib/array/array_kernel_abstract.h) + - for all valid i, j: + - truth_object_detections[i][j].num_parts() == get_scanner().get_num_movable_components_per_detection_template() + - all_parts_in_rect(truth_object_detections[i][j]) == true + ensures + - Uses the structural_svm_object_detection_problem to train an object_detector + on the given images and truth_object_detections. + - returns a function F with the following properties: + - F(new_image) == A prediction of what objects are present in new_image. This + is a set of rectangles indicating their positions. + !*/ + + template < + typename image_array_type + > + const trained_function_type train ( + const image_array_type& images, + const std::vector<std::vector<rectangle> >& truth_object_detections + ) const; + /*! + requires + - is_learning_problem(images, truth_object_detections) == true + - it must be valid to pass images[0] into the image_scanner_type::load() method. + (also, image_array_type must be an implementation of dlib/array/array_kernel_abstract.h) + - get_scanner().get_num_movable_components_per_detection_template() == 0 + ensures + - This function is identical to the above train(), except that it converts + each element of truth_object_detections into a full_object_detection by + passing it to full_object_detection's constructor taking only a rectangle. + Therefore, this version of train() is a convenience function for for the + case where you don't have any movable components of the detection templates. + !*/ + + template < + typename image_array_type + > + const trained_function_type train ( + const image_array_type& images, + const std::vector<std::vector<full_object_detection> >& truth_object_detections, + const std::vector<std::vector<rectangle> >& ignore, + const test_box_overlap& ignore_overlap_tester = test_box_overlap() + ) const; + /*! + requires + - is_learning_problem(images, truth_object_detections) == true + - it must be valid to pass images[0] into the image_scanner_type::load() method. + (also, image_array_type must be an implementation of dlib/array/array_kernel_abstract.h) + - ignore.size() == images.size() + - for all valid i, j: + - truth_object_detections[i][j].num_parts() == get_scanner().get_num_movable_components_per_detection_template() + - all_parts_in_rect(truth_object_detections[i][j]) == true + ensures + - Uses the structural_svm_object_detection_problem to train an object_detector + on the given images and truth_object_detections. + - for all valid i: + - Within images[i] any detections that match against a rectangle in + ignore[i], according to ignore_overlap_tester, are ignored. That is, + the optimizer doesn't care if the detector outputs a detection that + matches any of the ignore rectangles or if it fails to output a + detection for an ignore rectangle. Therefore, if there are objects + in your dataset that you are unsure if you want to detect or otherwise + don't care if the detector gets or doesn't then you can mark them + with ignore rectangles and the optimizer will simply ignore them. + - returns a function F with the following properties: + - F(new_image) == A prediction of what objects are present in new_image. This + is a set of rectangles indicating their positions. + !*/ + + template < + typename image_array_type + > + const trained_function_type train ( + const image_array_type& images, + const std::vector<std::vector<rectangle> >& truth_object_detections, + const std::vector<std::vector<rectangle> >& ignore, + const test_box_overlap& ignore_overlap_tester = test_box_overlap() + ) const; + /*! + requires + - is_learning_problem(images, truth_object_detections) == true + - ignore.size() == images.size() + - it must be valid to pass images[0] into the image_scanner_type::load() method. + (also, image_array_type must be an implementation of dlib/array/array_kernel_abstract.h) + - get_scanner().get_num_movable_components_per_detection_template() == 0 + ensures + - This function is identical to the above train(), except that it converts + each element of truth_object_detections into a full_object_detection by + passing it to full_object_detection's constructor taking only a rectangle. + Therefore, this version of train() is a convenience function for for the + case where you don't have any movable components of the detection templates. + !*/ + }; + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_STRUCTURAL_OBJECT_DETECTION_TRAiNER_H_ABSTRACTh_ + + |