summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/svm/structural_object_detection_trainer_abstract.h
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/dlib/svm/structural_object_detection_trainer_abstract.h')
-rw-r--r--ml/dlib/dlib/svm/structural_object_detection_trainer_abstract.h390
1 files changed, 390 insertions, 0 deletions
diff --git a/ml/dlib/dlib/svm/structural_object_detection_trainer_abstract.h b/ml/dlib/dlib/svm/structural_object_detection_trainer_abstract.h
new file mode 100644
index 000000000..2dd799874
--- /dev/null
+++ b/ml/dlib/dlib/svm/structural_object_detection_trainer_abstract.h
@@ -0,0 +1,390 @@
+// Copyright (C) 2011 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_STRUCTURAL_OBJECT_DETECTION_TRAiNER_H_ABSTRACTh_
+#ifdef DLIB_STRUCTURAL_OBJECT_DETECTION_TRAiNER_H_ABSTRACTh_
+
+#include "structural_svm_object_detection_problem_abstract.h"
+#include "../image_processing/object_detector_abstract.h"
+#include "../image_processing/box_overlap_testing_abstract.h"
+#include "../image_processing/full_object_detection_abstract.h"
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_scanner_type
+ >
+ class structural_object_detection_trainer : noncopyable
+ {
+ /*!
+ REQUIREMENTS ON image_scanner_type
+ image_scanner_type must be an implementation of
+ dlib/image_processing/scan_fhog_pyramid_abstract.h or
+ dlib/image_processing/scan_image_custom_abstract.h or
+ dlib/image_processing/scan_image_pyramid_abstract.h or
+ dlib/image_processing/scan_image_boxes_abstract.h
+
+ WHAT THIS OBJECT REPRESENTS
+ This object is a tool for learning to detect objects in images based on a
+ set of labeled images. The training procedure produces an object_detector
+ which can be used to predict the locations of objects in new images.
+
+ Note that this is just a convenience wrapper around the structural_svm_object_detection_problem
+ to make it look similar to all the other trainers in dlib.
+ !*/
+
+ public:
+ typedef double scalar_type;
+ typedef default_memory_manager mem_manager_type;
+ typedef object_detector<image_scanner_type> trained_function_type;
+
+
+ explicit structural_object_detection_trainer (
+ const image_scanner_type& scanner
+ );
+ /*!
+ requires
+ - scanner.get_num_detection_templates() > 0
+ ensures
+ - #get_c() == 1
+ - this object isn't verbose
+ - #get_epsilon() == 0.1
+ - #get_num_threads() == 2
+ - #get_max_cache_size() == 5
+ - #get_match_eps() == 0.5
+ - #get_loss_per_missed_target() == 1
+ - #get_loss_per_false_alarm() == 1
+ - This object will attempt to learn a model for the given
+ scanner object when train() is called.
+ - #get_scanner() == scanner
+ (note that only the "configuration" of scanner is copied.
+ I.e. the copy is done using copy_configuration())
+ - #auto_set_overlap_tester() == true
+ !*/
+
+ const image_scanner_type& get_scanner (
+ ) const;
+ /*!
+ ensures
+ - returns the image scanner used by this object.
+ !*/
+
+ bool auto_set_overlap_tester (
+ ) const;
+ /*!
+ ensures
+ - if (this object will automatically determine an appropriate
+ state for the overlap tester used for non-max suppression.) then
+ - returns true
+ - In this case, it is determined using the find_tight_overlap_tester()
+ routine based on the truth_object_detections given to the
+ structural_object_detection_trainer::train() method.
+ - else
+ - returns false
+ !*/
+
+ void set_overlap_tester (
+ const test_box_overlap& tester
+ );
+ /*!
+ ensures
+ - #get_overlap_tester() == tester
+ - #auto_set_overlap_tester() == false
+ !*/
+
+ test_box_overlap get_overlap_tester (
+ ) const;
+ /*!
+ requires
+ - auto_set_overlap_tester() == false
+ ensures
+ - returns the overlap tester object which will be used to perform non-max suppression.
+ In particular, this function returns the overlap tester which will populate the
+ object_detector returned by train().
+ !*/
+
+ void set_num_threads (
+ unsigned long num
+ );
+ /*!
+ ensures
+ - #get_num_threads() == num
+ !*/
+
+ unsigned long get_num_threads (
+ ) const;
+ /*!
+ ensures
+ - returns the number of threads used during training. You should
+ usually set this equal to the number of processing cores on your
+ machine.
+ !*/
+
+ void set_epsilon (
+ scalar_type eps
+ );
+ /*!
+ requires
+ - eps > 0
+ ensures
+ - #get_epsilon() == eps
+ !*/
+
+ const scalar_type get_epsilon (
+ ) const;
+ /*!
+ ensures
+ - returns the error epsilon that determines when training should stop.
+ Smaller values may result in a more accurate solution but take longer
+ to train. You can think of this epsilon value as saying "solve the
+ optimization problem until the average loss per sample is within epsilon
+ of its optimal value".
+ !*/
+
+ void set_max_cache_size (
+ unsigned long max_size
+ );
+ /*!
+ ensures
+ - #get_max_cache_size() == max_size
+ !*/
+
+ unsigned long get_max_cache_size (
+ ) const;
+ /*!
+ ensures
+ - During training, this object basically runs the object detector on
+ each image, over and over. To speed this up, it is possible to cache
+ the results of these detector invocations. This function returns the
+ number of cache elements per training sample kept in the cache. Note
+ that a value of 0 means caching is not used at all. Note also that
+ each cache element takes up about sizeof(double)*scanner.get_num_dimensions()
+ memory (where scanner is the scanner given to this object's constructor).
+ !*/
+
+ void be_verbose (
+ );
+ /*!
+ ensures
+ - This object will print status messages to standard out so that a
+ user can observe the progress of the algorithm.
+ !*/
+
+ void be_quiet (
+ );
+ /*!
+ ensures
+ - this object will not print anything to standard out
+ !*/
+
+ void set_oca (
+ const oca& item
+ );
+ /*!
+ ensures
+ - #get_oca() == item
+ !*/
+
+ const oca get_oca (
+ ) const;
+ /*!
+ ensures
+ - returns a copy of the optimizer used to solve the structural SVM problem.
+ !*/
+
+ void set_c (
+ scalar_type C
+ );
+ /*!
+ requires
+ - C > 0
+ ensures
+ - #get_c() = C
+ !*/
+
+ const scalar_type get_c (
+ ) const;
+ /*!
+ ensures
+ - returns the SVM regularization parameter. It is the parameter
+ that determines the trade-off between trying to fit the training
+ data (i.e. minimize the loss) or allowing more errors but hopefully
+ improving the generalization of the resulting detector. Larger
+ values encourage exact fitting while smaller values of C may encourage
+ better generalization.
+ !*/
+
+ void set_match_eps (
+ double eps
+ );
+ /*!
+ requires
+ - 0 < eps < 1
+ ensures
+ - #get_match_eps() == eps
+ !*/
+
+ double get_match_eps (
+ ) const;
+ /*!
+ ensures
+ - returns the amount of alignment necessary for a detection to be considered
+ as matching with a ground truth rectangle. If it doesn't match then
+ it is considered to be a false alarm. To define this precisely, let
+ A and B be two rectangles, then A and B match if and only if:
+ A.intersect(B).area()/(A+B).area() > get_match_eps()
+ !*/
+
+ double get_loss_per_missed_target (
+ ) const;
+ /*!
+ ensures
+ - returns the amount of loss experienced for failing to detect one of the
+ targets. If you care more about finding targets than having a low false
+ alarm rate then you can increase this value.
+ !*/
+
+ void set_loss_per_missed_target (
+ double loss
+ );
+ /*!
+ requires
+ - loss > 0
+ ensures
+ - #get_loss_per_missed_target() == loss
+ !*/
+
+ double get_loss_per_false_alarm (
+ ) const;
+ /*!
+ ensures
+ - returns the amount of loss experienced for emitting a false alarm detection.
+ Or in other words, the loss for generating a detection that doesn't correspond
+ to one of the truth rectangles. If you care more about having a low false
+ alarm rate than finding all the targets then you can increase this value.
+ !*/
+
+ void set_loss_per_false_alarm (
+ double loss
+ );
+ /*!
+ requires
+ - loss > 0
+ ensures
+ - #get_loss_per_false_alarm() == loss
+ !*/
+
+ template <
+ typename image_array_type
+ >
+ const trained_function_type train (
+ const image_array_type& images,
+ const std::vector<std::vector<full_object_detection> >& truth_object_detections
+ ) const;
+ /*!
+ requires
+ - is_learning_problem(images, truth_object_detections) == true
+ - it must be valid to pass images[0] into the image_scanner_type::load() method.
+ (also, image_array_type must be an implementation of dlib/array/array_kernel_abstract.h)
+ - for all valid i, j:
+ - truth_object_detections[i][j].num_parts() == get_scanner().get_num_movable_components_per_detection_template()
+ - all_parts_in_rect(truth_object_detections[i][j]) == true
+ ensures
+ - Uses the structural_svm_object_detection_problem to train an object_detector
+ on the given images and truth_object_detections.
+ - returns a function F with the following properties:
+ - F(new_image) == A prediction of what objects are present in new_image. This
+ is a set of rectangles indicating their positions.
+ !*/
+
+ template <
+ typename image_array_type
+ >
+ const trained_function_type train (
+ const image_array_type& images,
+ const std::vector<std::vector<rectangle> >& truth_object_detections
+ ) const;
+ /*!
+ requires
+ - is_learning_problem(images, truth_object_detections) == true
+ - it must be valid to pass images[0] into the image_scanner_type::load() method.
+ (also, image_array_type must be an implementation of dlib/array/array_kernel_abstract.h)
+ - get_scanner().get_num_movable_components_per_detection_template() == 0
+ ensures
+ - This function is identical to the above train(), except that it converts
+ each element of truth_object_detections into a full_object_detection by
+ passing it to full_object_detection's constructor taking only a rectangle.
+ Therefore, this version of train() is a convenience function for for the
+ case where you don't have any movable components of the detection templates.
+ !*/
+
+ template <
+ typename image_array_type
+ >
+ const trained_function_type train (
+ const image_array_type& images,
+ const std::vector<std::vector<full_object_detection> >& truth_object_detections,
+ const std::vector<std::vector<rectangle> >& ignore,
+ const test_box_overlap& ignore_overlap_tester = test_box_overlap()
+ ) const;
+ /*!
+ requires
+ - is_learning_problem(images, truth_object_detections) == true
+ - it must be valid to pass images[0] into the image_scanner_type::load() method.
+ (also, image_array_type must be an implementation of dlib/array/array_kernel_abstract.h)
+ - ignore.size() == images.size()
+ - for all valid i, j:
+ - truth_object_detections[i][j].num_parts() == get_scanner().get_num_movable_components_per_detection_template()
+ - all_parts_in_rect(truth_object_detections[i][j]) == true
+ ensures
+ - Uses the structural_svm_object_detection_problem to train an object_detector
+ on the given images and truth_object_detections.
+ - for all valid i:
+ - Within images[i] any detections that match against a rectangle in
+ ignore[i], according to ignore_overlap_tester, are ignored. That is,
+ the optimizer doesn't care if the detector outputs a detection that
+ matches any of the ignore rectangles or if it fails to output a
+ detection for an ignore rectangle. Therefore, if there are objects
+ in your dataset that you are unsure if you want to detect or otherwise
+ don't care if the detector gets or doesn't then you can mark them
+ with ignore rectangles and the optimizer will simply ignore them.
+ - returns a function F with the following properties:
+ - F(new_image) == A prediction of what objects are present in new_image. This
+ is a set of rectangles indicating their positions.
+ !*/
+
+ template <
+ typename image_array_type
+ >
+ const trained_function_type train (
+ const image_array_type& images,
+ const std::vector<std::vector<rectangle> >& truth_object_detections,
+ const std::vector<std::vector<rectangle> >& ignore,
+ const test_box_overlap& ignore_overlap_tester = test_box_overlap()
+ ) const;
+ /*!
+ requires
+ - is_learning_problem(images, truth_object_detections) == true
+ - ignore.size() == images.size()
+ - it must be valid to pass images[0] into the image_scanner_type::load() method.
+ (also, image_array_type must be an implementation of dlib/array/array_kernel_abstract.h)
+ - get_scanner().get_num_movable_components_per_detection_template() == 0
+ ensures
+ - This function is identical to the above train(), except that it converts
+ each element of truth_object_detections into a full_object_detection by
+ passing it to full_object_detection's constructor taking only a rectangle.
+ Therefore, this version of train() is a convenience function for for the
+ case where you don't have any movable components of the detection templates.
+ !*/
+ };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_STRUCTURAL_OBJECT_DETECTION_TRAiNER_H_ABSTRACTh_
+
+