diff options
Diffstat (limited to 'ml/dlib/dlib/image_processing/object_detector_abstract.h')
-rw-r--r-- | ml/dlib/dlib/image_processing/object_detector_abstract.h | 404 |
1 files changed, 404 insertions, 0 deletions
diff --git a/ml/dlib/dlib/image_processing/object_detector_abstract.h b/ml/dlib/dlib/image_processing/object_detector_abstract.h new file mode 100644 index 000000000..9578d8b03 --- /dev/null +++ b/ml/dlib/dlib/image_processing/object_detector_abstract.h @@ -0,0 +1,404 @@ +// Copyright (C) 2011 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_OBJECT_DeTECTOR_ABSTRACT_Hh_ +#ifdef DLIB_OBJECT_DeTECTOR_ABSTRACT_Hh_ + +#include "../geometry.h" +#include <vector> +#include "box_overlap_testing_abstract.h" +#include "full_object_detection_abstract.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + struct rect_detection + { + double detection_confidence; + unsigned long weight_index; + rectangle rect; + }; + + struct full_detection + { + double detection_confidence; + unsigned long weight_index; + full_object_detection rect; + }; + +// ---------------------------------------------------------------------------------------- + + template < + typename image_scanner_type_ + > + class object_detector + { + /*! + REQUIREMENTS ON image_scanner_type_ + image_scanner_type_ must be an implementation of + dlib/image_processing/scan_image_pyramid_abstract.h or + dlib/image_processing/scan_fhog_pyramid.h or + dlib/image_processing/scan_image_custom.h or + dlib/image_processing/scan_image_boxes_abstract.h + + WHAT THIS OBJECT REPRESENTS + This object is a tool for detecting the positions of objects in an image. + In particular, it is a simple container to aggregate an instance of an image + scanner (i.e. scan_image_pyramid, scan_fhog_pyramid, scan_image_custom, or + scan_image_boxes), the weight vector needed by one of these image scanners, + and finally an instance of test_box_overlap. The test_box_overlap object + is used to perform non-max suppression on the output of the image scanner + object. + + Note further that this object can contain multiple weight vectors. In this + case, it will run the image scanner multiple times, once with each of the + weight vectors. Then it will aggregate the results from all runs, perform + non-max suppression and then return the results. Therefore, the object_detector + can also be used as a container for a set of object detectors that all use + the same image scanner but different weight vectors. This is useful since + the object detection procedure has two parts. A loading step where the + image is loaded into the scanner, then a detect step which uses the weight + vector to locate objects in the image. Since the loading step is independent + of the weight vector it is most efficient to run multiple detectors by + performing one load into a scanner followed by multiple detect steps. This + avoids unnecessarily loading the same image into the scanner multiple times. + !*/ + public: + typedef image_scanner_type_ image_scanner_type; + typedef typename image_scanner_type::feature_vector_type feature_vector_type; + + object_detector ( + ); + /*! + ensures + - This detector won't generate any detections when + presented with an image. + - #num_detectors() == 0 + !*/ + + object_detector ( + const object_detector& item + ); + /*! + ensures + - #*this is a copy of item + - #get_scanner() == item.get_scanner() + (note that only the "configuration" of item.get_scanner() is copied. + I.e. the copy is done using copy_configuration()) + !*/ + + object_detector ( + const image_scanner_type& scanner, + const test_box_overlap& overlap_tester, + const feature_vector_type& w + ); + /*! + requires + - w.size() == scanner.get_num_dimensions() + 1 + - scanner.get_num_detection_templates() > 0 + ensures + - When the operator() member function is called it will + invoke scanner.detect(w,dets,w(w.size()-1)), suppress + overlapping detections, and then report the results. + - when #*this is used to detect objects, the set of + output detections will never contain any overlaps + with respect to overlap_tester. That is, for all + pairs of returned detections A and B, we will always + have: overlap_tester(A,B) == false + - #get_w() == w + - #get_overlap_tester() == overlap_tester + - #get_scanner() == scanner + (note that only the "configuration" of scanner is copied. + I.e. the copy is done using copy_configuration()) + - #num_detectors() == 1 + !*/ + + object_detector ( + const image_scanner_type& scanner, + const test_box_overlap& overlap_tester, + const std::vector<feature_vector_type>& w + ); + /*! + requires + - for all valid i: + - w[i].size() == scanner.get_num_dimensions() + 1 + - scanner.get_num_detection_templates() > 0 + - w.size() > 0 + ensures + - When the operator() member function is called it will invoke + get_scanner().detect(w[i],dets,w[i](w[i].size()-1)) for all valid i. Then it + will take all the detections output by the calls to detect() and suppress + overlapping detections, and finally report the results. + - when #*this is used to detect objects, the set of output detections will + never contain any overlaps with respect to overlap_tester. That is, for + all pairs of returned detections A and B, we will always have: + overlap_tester(A,B) == false + - for all valid i: + - #get_w(i) == w[i] + - #num_detectors() == w.size() + - #get_overlap_tester() == overlap_tester + - #get_scanner() == scanner + (note that only the "configuration" of scanner is copied. + I.e. the copy is done using copy_configuration()) + !*/ + + explicit object_detector ( + const std::vector<object_detector>& detectors + ); + /*! + requires + - detectors.size() != 0 + - All the detectors must use compatibly configured scanners. That is, it + must make sense for the weight vector from one detector to be used with + the scanner from any other. + - for all valid i: + - detectors[i].get_scanner().get_num_dimensions() == detectors[0].get_scanner().get_num_dimensions() + (i.e. all the detectors use scanners that use the same kind of feature vectors.) + ensures + - Very much like the above constructor, this constructor takes all the + given detectors and packs them into #*this. That is, invoking operator() + on #*this will run all the detectors, perform non-max suppression, and + then report the results. + - When #*this is used to detect objects, the set of output detections will + never contain any overlaps with respect to overlap_tester. That is, for + all pairs of returned detections A and B, we will always have: + overlap_tester(A,B) == false + - #num_detectors() == The sum of detectors[i].num_detectors() for all valid i. + - #get_overlap_tester() == detectors[0].get_overlap_tester() + - #get_scanner() == detectors[0].get_scanner() + (note that only the "configuration" of scanner is copied. I.e. the copy + is done using copy_configuration()) + !*/ + + unsigned long num_detectors ( + ) const; + /*! + ensures + - returns the number of weight vectors in this object. Since each weight + vector logically represents an object detector, this returns the number + of object detectors contained in this object. + !*/ + + const feature_vector_type& get_w ( + unsigned long idx = 0 + ) const; + /*! + requires + - idx < num_detectors() + ensures + - returns the idx-th weight vector loaded into this object. All the weight vectors + have the same dimension and logically each represents a different detector. + !*/ + + const test_box_overlap& get_overlap_tester ( + ) const; + /*! + ensures + - returns the overlap tester used by this object + !*/ + + const image_scanner_type& get_scanner ( + ) const; + /*! + ensures + - returns the image scanner used by this object. + !*/ + + object_detector& operator= ( + const object_detector& item + ); + /*! + ensures + - #*this is a copy of item + - #get_scanner() == item.get_scanner() + (note that only the "configuration" of item.get_scanner() is + copied. I.e. the copy is done using copy_configuration()) + - returns #*this + !*/ + + template < + typename image_type + > + void operator() ( + const image_type& img, + std::vector<rect_detection>& dets, + double adjust_threshold = 0 + ); + /*! + requires + - img == an object which can be accepted by image_scanner_type::load() + ensures + - Performs object detection on the given image and stores the detected + objects into #dets. In particular, we will have that: + - #dets is sorted such that the highest confidence detections come + first. E.g. element 0 is the best detection, element 1 the next + best, and so on. + - #dets.size() == the number of detected objects. + - #dets[i].detection_confidence == The strength of the i-th detection. + Larger values indicate that the detector is more confident that + #dets[i] is a correct detection rather than being a false alarm. + Moreover, the detection_confidence is equal to the detection value + output by the scanner minus the threshold value stored at the end of + the weight vector in get_w(#dets[i].weight_index). + - #dets[i].weight_index == the index for the weight vector that + generated this detection. + - #dets[i].rect == the bounding box for the i-th detection. + - #get_scanner() will have been loaded with img. Therefore, you can call + #get_scanner().get_feature_vector() to obtain the feature vectors or + #get_scanner().get_full_object_detection() to get the + full_object_detections for the resulting object detection boxes. + - The detection threshold is adjusted by having adjust_threshold added to + it. Therefore, an adjust_threshold value > 0 makes detecting objects + harder while a negative value makes it easier. Moreover, the following + will be true for all valid i: + - #dets[i].detection_confidence >= adjust_threshold + This means that, for example, you can obtain the maximum possible number + of detections by setting adjust_threshold equal to negative infinity. + !*/ + + template < + typename image_type + > + void operator() ( + const image_type& img, + std::vector<full_detection>& dets, + double adjust_threshold = 0 + ); + /*! + requires + - img == an object which can be accepted by image_scanner_type::load() + ensures + - This function is identical to the above operator() routine, except that + it outputs full_object_detections instead of rectangles. This means that + the output includes part locations. In particular, calling this function + is the same as calling the above operator() routine and then using + get_scanner().get_full_object_detection() to resolve all the rectangles + into full_object_detections. Therefore, this version of operator() is + simply a convenience function for performing this set of operations. + !*/ + + template < + typename image_type + > + std::vector<rectangle> operator() ( + const image_type& img, + const adjust_threshold = 0 + ); + /*! + requires + - img == an object which can be accepted by image_scanner_type::load() + ensures + - This function is identical to the above operator() routine, except that + it returns a std::vector<rectangle> which contains just the bounding + boxes of all the detections. + !*/ + + template < + typename image_type + > + void operator() ( + const image_type& img, + std::vector<std::pair<double, rectangle> >& dets, + double adjust_threshold = 0 + ); + /*! + requires + - img == an object which can be accepted by image_scanner_type::load() + ensures + - performs object detection on the given image and stores the + detected objects into #dets. In particular, we will have that: + - #dets is sorted such that the highest confidence detections + come first. E.g. element 0 is the best detection, element 1 + the next best, and so on. + - #dets.size() == the number of detected objects. + - #dets[i].first gives the "detection confidence", of the i-th + detection. This is the detection value output by the scanner minus + the threshold value stored at the end of the weight vector in get_w(). + - #dets[i].second == the bounding box for the i-th detection. + - #get_scanner() will have been loaded with img. Therefore, you can call + #get_scanner().get_feature_vector() to obtain the feature vectors or + #get_scanner().get_full_object_detection() to get the + full_object_detections for the resulting object detection boxes. + - The detection threshold is adjusted by having adjust_threshold added to + it. Therefore, an adjust_threshold value > 0 makes detecting objects + harder while a negative value makes it easier. Moreover, the following + will be true for all valid i: + - #dets[i].first >= adjust_threshold + This means that, for example, you can obtain the maximum possible number + of detections by setting adjust_threshold equal to negative infinity. + !*/ + + template < + typename image_type + > + void operator() ( + const image_type& img, + std::vector<std::pair<double, full_object_detection> >& dets, + double adjust_threshold = 0 + ); + /*! + requires + - img == an object which can be accepted by image_scanner_type::load() + ensures + - This function is identical to the above operator() routine, except that + it outputs full_object_detections instead of rectangles. This means that + the output includes part locations. In particular, calling this function + is the same as calling the above operator() routine and then using + get_scanner().get_full_object_detection() to resolve all the rectangles + into full_object_detections. Therefore, this version of operator() is + simply a convenience function for performing this set of operations. + !*/ + + template < + typename image_type + > + void operator() ( + const image_type& img, + std::vector<full_object_detection>& dets, + double adjust_threshold = 0 + ); + /*! + requires + - img == an object which can be accepted by image_scanner_type::load() + ensures + - This function is identical to the above operator() routine, except that + it doesn't include a double valued score. That is, it just outputs the + full_object_detections. + !*/ + }; + +// ---------------------------------------------------------------------------------------- + + template <typename T> + void serialize ( + const object_detector<T>& item, + std::ostream& out + ); + /*! + provides serialization support. Note that this function only saves the + configuration part of item.get_scanner(). That is, we use the scanner's + copy_configuration() function to get a copy of the scanner that doesn't contain any + loaded image data and we then save just the configuration part of the scanner. + This means that any serialized object_detectors won't remember any images they have + processed but will otherwise contain all their state and be able to detect objects + in new images. + !*/ + +// ---------------------------------------------------------------------------------------- + + template <typename T> + void deserialize ( + object_detector<T>& item, + std::istream& in + ); + /*! + provides deserialization support + !*/ + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_OBJECT_DeTECTOR_ABSTRACT_Hh_ + |