summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/image_processing/object_detector_abstract.h
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/dlib/image_processing/object_detector_abstract.h')
-rw-r--r--ml/dlib/dlib/image_processing/object_detector_abstract.h404
1 files changed, 404 insertions, 0 deletions
diff --git a/ml/dlib/dlib/image_processing/object_detector_abstract.h b/ml/dlib/dlib/image_processing/object_detector_abstract.h
new file mode 100644
index 000000000..9578d8b03
--- /dev/null
+++ b/ml/dlib/dlib/image_processing/object_detector_abstract.h
@@ -0,0 +1,404 @@
+// Copyright (C) 2011 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_OBJECT_DeTECTOR_ABSTRACT_Hh_
+#ifdef DLIB_OBJECT_DeTECTOR_ABSTRACT_Hh_
+
+#include "../geometry.h"
+#include <vector>
+#include "box_overlap_testing_abstract.h"
+#include "full_object_detection_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ struct rect_detection
+ {
+ double detection_confidence;
+ unsigned long weight_index;
+ rectangle rect;
+ };
+
+ struct full_detection
+ {
+ double detection_confidence;
+ unsigned long weight_index;
+ full_object_detection rect;
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_scanner_type_
+ >
+ class object_detector
+ {
+ /*!
+ REQUIREMENTS ON image_scanner_type_
+ image_scanner_type_ must be an implementation of
+ dlib/image_processing/scan_image_pyramid_abstract.h or
+ dlib/image_processing/scan_fhog_pyramid.h or
+ dlib/image_processing/scan_image_custom.h or
+ dlib/image_processing/scan_image_boxes_abstract.h
+
+ WHAT THIS OBJECT REPRESENTS
+ This object is a tool for detecting the positions of objects in an image.
+ In particular, it is a simple container to aggregate an instance of an image
+ scanner (i.e. scan_image_pyramid, scan_fhog_pyramid, scan_image_custom, or
+ scan_image_boxes), the weight vector needed by one of these image scanners,
+ and finally an instance of test_box_overlap. The test_box_overlap object
+ is used to perform non-max suppression on the output of the image scanner
+ object.
+
+ Note further that this object can contain multiple weight vectors. In this
+ case, it will run the image scanner multiple times, once with each of the
+ weight vectors. Then it will aggregate the results from all runs, perform
+ non-max suppression and then return the results. Therefore, the object_detector
+ can also be used as a container for a set of object detectors that all use
+ the same image scanner but different weight vectors. This is useful since
+ the object detection procedure has two parts. A loading step where the
+ image is loaded into the scanner, then a detect step which uses the weight
+ vector to locate objects in the image. Since the loading step is independent
+ of the weight vector it is most efficient to run multiple detectors by
+ performing one load into a scanner followed by multiple detect steps. This
+ avoids unnecessarily loading the same image into the scanner multiple times.
+ !*/
+ public:
+ typedef image_scanner_type_ image_scanner_type;
+ typedef typename image_scanner_type::feature_vector_type feature_vector_type;
+
+ object_detector (
+ );
+ /*!
+ ensures
+ - This detector won't generate any detections when
+ presented with an image.
+ - #num_detectors() == 0
+ !*/
+
+ object_detector (
+ const object_detector& item
+ );
+ /*!
+ ensures
+ - #*this is a copy of item
+ - #get_scanner() == item.get_scanner()
+ (note that only the "configuration" of item.get_scanner() is copied.
+ I.e. the copy is done using copy_configuration())
+ !*/
+
+ object_detector (
+ const image_scanner_type& scanner,
+ const test_box_overlap& overlap_tester,
+ const feature_vector_type& w
+ );
+ /*!
+ requires
+ - w.size() == scanner.get_num_dimensions() + 1
+ - scanner.get_num_detection_templates() > 0
+ ensures
+ - When the operator() member function is called it will
+ invoke scanner.detect(w,dets,w(w.size()-1)), suppress
+ overlapping detections, and then report the results.
+ - when #*this is used to detect objects, the set of
+ output detections will never contain any overlaps
+ with respect to overlap_tester. That is, for all
+ pairs of returned detections A and B, we will always
+ have: overlap_tester(A,B) == false
+ - #get_w() == w
+ - #get_overlap_tester() == overlap_tester
+ - #get_scanner() == scanner
+ (note that only the "configuration" of scanner is copied.
+ I.e. the copy is done using copy_configuration())
+ - #num_detectors() == 1
+ !*/
+
+ object_detector (
+ const image_scanner_type& scanner,
+ const test_box_overlap& overlap_tester,
+ const std::vector<feature_vector_type>& w
+ );
+ /*!
+ requires
+ - for all valid i:
+ - w[i].size() == scanner.get_num_dimensions() + 1
+ - scanner.get_num_detection_templates() > 0
+ - w.size() > 0
+ ensures
+ - When the operator() member function is called it will invoke
+ get_scanner().detect(w[i],dets,w[i](w[i].size()-1)) for all valid i. Then it
+ will take all the detections output by the calls to detect() and suppress
+ overlapping detections, and finally report the results.
+ - when #*this is used to detect objects, the set of output detections will
+ never contain any overlaps with respect to overlap_tester. That is, for
+ all pairs of returned detections A and B, we will always have:
+ overlap_tester(A,B) == false
+ - for all valid i:
+ - #get_w(i) == w[i]
+ - #num_detectors() == w.size()
+ - #get_overlap_tester() == overlap_tester
+ - #get_scanner() == scanner
+ (note that only the "configuration" of scanner is copied.
+ I.e. the copy is done using copy_configuration())
+ !*/
+
+ explicit object_detector (
+ const std::vector<object_detector>& detectors
+ );
+ /*!
+ requires
+ - detectors.size() != 0
+ - All the detectors must use compatibly configured scanners. That is, it
+ must make sense for the weight vector from one detector to be used with
+ the scanner from any other.
+ - for all valid i:
+ - detectors[i].get_scanner().get_num_dimensions() == detectors[0].get_scanner().get_num_dimensions()
+ (i.e. all the detectors use scanners that use the same kind of feature vectors.)
+ ensures
+ - Very much like the above constructor, this constructor takes all the
+ given detectors and packs them into #*this. That is, invoking operator()
+ on #*this will run all the detectors, perform non-max suppression, and
+ then report the results.
+ - When #*this is used to detect objects, the set of output detections will
+ never contain any overlaps with respect to overlap_tester. That is, for
+ all pairs of returned detections A and B, we will always have:
+ overlap_tester(A,B) == false
+ - #num_detectors() == The sum of detectors[i].num_detectors() for all valid i.
+ - #get_overlap_tester() == detectors[0].get_overlap_tester()
+ - #get_scanner() == detectors[0].get_scanner()
+ (note that only the "configuration" of scanner is copied. I.e. the copy
+ is done using copy_configuration())
+ !*/
+
+ unsigned long num_detectors (
+ ) const;
+ /*!
+ ensures
+ - returns the number of weight vectors in this object. Since each weight
+ vector logically represents an object detector, this returns the number
+ of object detectors contained in this object.
+ !*/
+
+ const feature_vector_type& get_w (
+ unsigned long idx = 0
+ ) const;
+ /*!
+ requires
+ - idx < num_detectors()
+ ensures
+ - returns the idx-th weight vector loaded into this object. All the weight vectors
+ have the same dimension and logically each represents a different detector.
+ !*/
+
+ const test_box_overlap& get_overlap_tester (
+ ) const;
+ /*!
+ ensures
+ - returns the overlap tester used by this object
+ !*/
+
+ const image_scanner_type& get_scanner (
+ ) const;
+ /*!
+ ensures
+ - returns the image scanner used by this object.
+ !*/
+
+ object_detector& operator= (
+ const object_detector& item
+ );
+ /*!
+ ensures
+ - #*this is a copy of item
+ - #get_scanner() == item.get_scanner()
+ (note that only the "configuration" of item.get_scanner() is
+ copied. I.e. the copy is done using copy_configuration())
+ - returns #*this
+ !*/
+
+ template <
+ typename image_type
+ >
+ void operator() (
+ const image_type& img,
+ std::vector<rect_detection>& dets,
+ double adjust_threshold = 0
+ );
+ /*!
+ requires
+ - img == an object which can be accepted by image_scanner_type::load()
+ ensures
+ - Performs object detection on the given image and stores the detected
+ objects into #dets. In particular, we will have that:
+ - #dets is sorted such that the highest confidence detections come
+ first. E.g. element 0 is the best detection, element 1 the next
+ best, and so on.
+ - #dets.size() == the number of detected objects.
+ - #dets[i].detection_confidence == The strength of the i-th detection.
+ Larger values indicate that the detector is more confident that
+ #dets[i] is a correct detection rather than being a false alarm.
+ Moreover, the detection_confidence is equal to the detection value
+ output by the scanner minus the threshold value stored at the end of
+ the weight vector in get_w(#dets[i].weight_index).
+ - #dets[i].weight_index == the index for the weight vector that
+ generated this detection.
+ - #dets[i].rect == the bounding box for the i-th detection.
+ - #get_scanner() will have been loaded with img. Therefore, you can call
+ #get_scanner().get_feature_vector() to obtain the feature vectors or
+ #get_scanner().get_full_object_detection() to get the
+ full_object_detections for the resulting object detection boxes.
+ - The detection threshold is adjusted by having adjust_threshold added to
+ it. Therefore, an adjust_threshold value > 0 makes detecting objects
+ harder while a negative value makes it easier. Moreover, the following
+ will be true for all valid i:
+ - #dets[i].detection_confidence >= adjust_threshold
+ This means that, for example, you can obtain the maximum possible number
+ of detections by setting adjust_threshold equal to negative infinity.
+ !*/
+
+ template <
+ typename image_type
+ >
+ void operator() (
+ const image_type& img,
+ std::vector<full_detection>& dets,
+ double adjust_threshold = 0
+ );
+ /*!
+ requires
+ - img == an object which can be accepted by image_scanner_type::load()
+ ensures
+ - This function is identical to the above operator() routine, except that
+ it outputs full_object_detections instead of rectangles. This means that
+ the output includes part locations. In particular, calling this function
+ is the same as calling the above operator() routine and then using
+ get_scanner().get_full_object_detection() to resolve all the rectangles
+ into full_object_detections. Therefore, this version of operator() is
+ simply a convenience function for performing this set of operations.
+ !*/
+
+ template <
+ typename image_type
+ >
+ std::vector<rectangle> operator() (
+ const image_type& img,
+ const adjust_threshold = 0
+ );
+ /*!
+ requires
+ - img == an object which can be accepted by image_scanner_type::load()
+ ensures
+ - This function is identical to the above operator() routine, except that
+ it returns a std::vector<rectangle> which contains just the bounding
+ boxes of all the detections.
+ !*/
+
+ template <
+ typename image_type
+ >
+ void operator() (
+ const image_type& img,
+ std::vector<std::pair<double, rectangle> >& dets,
+ double adjust_threshold = 0
+ );
+ /*!
+ requires
+ - img == an object which can be accepted by image_scanner_type::load()
+ ensures
+ - performs object detection on the given image and stores the
+ detected objects into #dets. In particular, we will have that:
+ - #dets is sorted such that the highest confidence detections
+ come first. E.g. element 0 is the best detection, element 1
+ the next best, and so on.
+ - #dets.size() == the number of detected objects.
+ - #dets[i].first gives the "detection confidence", of the i-th
+ detection. This is the detection value output by the scanner minus
+ the threshold value stored at the end of the weight vector in get_w().
+ - #dets[i].second == the bounding box for the i-th detection.
+ - #get_scanner() will have been loaded with img. Therefore, you can call
+ #get_scanner().get_feature_vector() to obtain the feature vectors or
+ #get_scanner().get_full_object_detection() to get the
+ full_object_detections for the resulting object detection boxes.
+ - The detection threshold is adjusted by having adjust_threshold added to
+ it. Therefore, an adjust_threshold value > 0 makes detecting objects
+ harder while a negative value makes it easier. Moreover, the following
+ will be true for all valid i:
+ - #dets[i].first >= adjust_threshold
+ This means that, for example, you can obtain the maximum possible number
+ of detections by setting adjust_threshold equal to negative infinity.
+ !*/
+
+ template <
+ typename image_type
+ >
+ void operator() (
+ const image_type& img,
+ std::vector<std::pair<double, full_object_detection> >& dets,
+ double adjust_threshold = 0
+ );
+ /*!
+ requires
+ - img == an object which can be accepted by image_scanner_type::load()
+ ensures
+ - This function is identical to the above operator() routine, except that
+ it outputs full_object_detections instead of rectangles. This means that
+ the output includes part locations. In particular, calling this function
+ is the same as calling the above operator() routine and then using
+ get_scanner().get_full_object_detection() to resolve all the rectangles
+ into full_object_detections. Therefore, this version of operator() is
+ simply a convenience function for performing this set of operations.
+ !*/
+
+ template <
+ typename image_type
+ >
+ void operator() (
+ const image_type& img,
+ std::vector<full_object_detection>& dets,
+ double adjust_threshold = 0
+ );
+ /*!
+ requires
+ - img == an object which can be accepted by image_scanner_type::load()
+ ensures
+ - This function is identical to the above operator() routine, except that
+ it doesn't include a double valued score. That is, it just outputs the
+ full_object_detections.
+ !*/
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ template <typename T>
+ void serialize (
+ const object_detector<T>& item,
+ std::ostream& out
+ );
+ /*!
+ provides serialization support. Note that this function only saves the
+ configuration part of item.get_scanner(). That is, we use the scanner's
+ copy_configuration() function to get a copy of the scanner that doesn't contain any
+ loaded image data and we then save just the configuration part of the scanner.
+ This means that any serialized object_detectors won't remember any images they have
+ processed but will otherwise contain all their state and be able to detect objects
+ in new images.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <typename T>
+ void deserialize (
+ object_detector<T>& item,
+ std::istream& in
+ );
+ /*!
+ provides deserialization support
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_OBJECT_DeTECTOR_ABSTRACT_Hh_
+