1 files changed, 404 insertions, 0 deletions
diff --git a/ml/dlib/dlib/image_processing/object_detector_abstract.h b/ml/dlib/dlib/image_processing/object_detector_abstract.h
new file mode 100644
index 000000000..9578d8b03
--- /dev/null
+++ b/ml/dlib/dlib/image_processing/object_detector_abstract.h
@@ -0,0 +1,404 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_OBJECT_DeTECTOR_ABSTRACT_Hh_
+#ifdef DLIB_OBJECT_DeTECTOR_ABSTRACT_Hh_
+
+#include "../geometry.h"
+#include <vector>
+#include "box_overlap_testing_abstract.h"
+#include "full_object_detection_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    struct rect_detection
+    {
+        double detection_confidence;
+        unsigned long weight_index;
+        rectangle rect;
+    };
+
+    struct full_detection
+    {
+        double detection_confidence;
+        unsigned long weight_index;
+        full_object_detection rect;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename image_scanner_type_
+        >
+    class object_detector
+    {
+        /*!
+            REQUIREMENTS ON image_scanner_type_
+                image_scanner_type_ must be an implementation of 
+                dlib/image_processing/scan_image_pyramid_abstract.h or 
+                dlib/image_processing/scan_fhog_pyramid.h or 
+                dlib/image_processing/scan_image_custom.h or 
+                dlib/image_processing/scan_image_boxes_abstract.h 
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for detecting the positions of objects in an image.
+                In particular, it is a simple container to aggregate an instance of an image 
+                scanner (i.e. scan_image_pyramid, scan_fhog_pyramid, scan_image_custom, or
+                scan_image_boxes), the weight vector needed by one of these image scanners,
+                and finally an instance of test_box_overlap.  The test_box_overlap object
+                is used to perform non-max suppression on the output of the image scanner
+                object.  
+
+                Note further that this object can contain multiple weight vectors.  In this
+                case, it will run the image scanner multiple times, once with each of the
+                weight vectors.  Then it will aggregate the results from all runs, perform
+                non-max suppression and then return the results.  Therefore, the object_detector 
+                can also be used as a container for a set of object detectors that all use
+                the same image scanner but different weight vectors.  This is useful since
+                the object detection procedure has two parts.  A loading step where the
+                image is loaded into the scanner, then a detect step which uses the weight
+                vector to locate objects in the image.  Since the loading step is independent 
+                of the weight vector it is most efficient to run multiple detectors by
+                performing one load into a scanner followed by multiple detect steps.  This
+                avoids unnecessarily loading the same image into the scanner multiple times.  
+        !*/
+    public:
+        typedef image_scanner_type_ image_scanner_type;
+        typedef typename image_scanner_type::feature_vector_type feature_vector_type;
+
+        object_detector (
+        );
+        /*!
+            ensures
+                - This detector won't generate any detections when
+                  presented with an image.
+                - #num_detectors() == 0
+        !*/
+
+        object_detector (
+            const object_detector& item 
+        );
+        /*!
+            ensures
+                - #*this is a copy of item
+                - #get_scanner() == item.get_scanner()
+                  (note that only the "configuration" of item.get_scanner() is copied.
+                  I.e. the copy is done using copy_configuration())
+        !*/
+
+        object_detector (
+            const image_scanner_type& scanner, 
+            const test_box_overlap& overlap_tester,
+            const feature_vector_type& w 
+        );
+        /*!
+            requires
+                - w.size() == scanner.get_num_dimensions() + 1
+                - scanner.get_num_detection_templates() > 0
+            ensures
+                - When the operator() member function is called it will
+                  invoke scanner.detect(w,dets,w(w.size()-1)), suppress
+                  overlapping detections, and then report the results.
+                - when #*this is used to detect objects, the set of
+                  output detections will never contain any overlaps
+                  with respect to overlap_tester.  That is, for all 
+                  pairs of returned detections A and B, we will always
+                  have: overlap_tester(A,B) == false
+                - #get_w() == w
+                - #get_overlap_tester() == overlap_tester
+                - #get_scanner() == scanner
+                  (note that only the "configuration" of scanner is copied.
+                  I.e. the copy is done using copy_configuration())
+                - #num_detectors() == 1
+        !*/
+
+        object_detector (
+            const image_scanner_type& scanner, 
+            const test_box_overlap& overlap_tester,
+            const std::vector<feature_vector_type>& w 
+        );
+        /*!
+            requires
+                - for all valid i:
+                    - w[i].size() == scanner.get_num_dimensions() + 1
+                - scanner.get_num_detection_templates() > 0
+                - w.size() > 0
+            ensures
+                - When the operator() member function is called it will invoke
+                  get_scanner().detect(w[i],dets,w[i](w[i].size()-1)) for all valid i.  Then it
+                  will take all the detections output by the calls to detect() and suppress
+                  overlapping detections, and finally report the results.
+                - when #*this is used to detect objects, the set of output detections will
+                  never contain any overlaps with respect to overlap_tester.  That is, for
+                  all pairs of returned detections A and B, we will always have:
+                    overlap_tester(A,B) == false
+                - for all valid i:
+                    - #get_w(i) == w[i]
+                - #num_detectors() == w.size()
+                - #get_overlap_tester() == overlap_tester
+                - #get_scanner() == scanner
+                  (note that only the "configuration" of scanner is copied.
+                  I.e. the copy is done using copy_configuration())
+        !*/
+
+        explicit object_detector (
+            const std::vector<object_detector>& detectors
+        );
+        /*!
+            requires
+                - detectors.size() != 0
+                - All the detectors must use compatibly configured scanners.  That is, it
+                  must make sense for the weight vector from one detector to be used with
+                  the scanner from any other.
+                - for all valid i:
+                    - detectors[i].get_scanner().get_num_dimensions() == detectors[0].get_scanner().get_num_dimensions()
+                      (i.e. all the detectors use scanners that use the same kind of feature vectors.)
+            ensures
+                - Very much like the above constructor, this constructor takes all the
+                  given detectors and packs them into #*this.  That is, invoking operator()
+                  on #*this will run all the detectors, perform non-max suppression, and
+                  then report the results.
+                - When #*this is used to detect objects, the set of output detections will
+                  never contain any overlaps with respect to overlap_tester.  That is, for
+                  all pairs of returned detections A and B, we will always have:
+                    overlap_tester(A,B) == false
+                - #num_detectors() == The sum of detectors[i].num_detectors() for all valid i. 
+                - #get_overlap_tester() == detectors[0].get_overlap_tester()
+                - #get_scanner() == detectors[0].get_scanner()
+                  (note that only the "configuration" of scanner is copied.  I.e. the copy
+                  is done using copy_configuration())
+        !*/
+
+        unsigned long num_detectors (
+        ) const; 
+        /*!
+            ensures
+                - returns the number of weight vectors in this object.  Since each weight
+                  vector logically represents an object detector, this returns the number
+                  of object detectors contained in this object.
+        !*/
+
+        const feature_vector_type& get_w (
+            unsigned long idx = 0
+        ) const;
+        /*!
+            requires
+                - idx < num_detectors()
+            ensures
+                - returns the idx-th weight vector loaded into this object.  All the weight vectors
+                  have the same dimension and logically each represents a different detector.
+        !*/
+
+        const test_box_overlap& get_overlap_tester (
+        ) const;
+        /*!
+            ensures
+                - returns the overlap tester used by this object
+        !*/
+
+        const image_scanner_type& get_scanner (
+        ) const;
+        /*!
+            ensures
+                - returns the image scanner used by this object.  
+        !*/
+
+        object_detector& operator= (
+            const object_detector& item 
+        );
+        /*!
+            ensures
+                - #*this is a copy of item
+                - #get_scanner() == item.get_scanner()
+                  (note that only the "configuration" of item.get_scanner() is 
+                  copied.  I.e. the copy is done using copy_configuration())
+                - returns #*this
+        !*/
+
+        template <
+            typename image_type
+            >
+        void operator() (
+            const image_type& img,
+            std::vector<rect_detection>& dets,
+            double adjust_threshold = 0
+        );
+        /*!
+            requires
+                - img == an object which can be accepted by image_scanner_type::load()
+            ensures
+                - Performs object detection on the given image and stores the detected
+                  objects into #dets.  In particular, we will have that:
+                    - #dets is sorted such that the highest confidence detections come
+                      first.  E.g. element 0 is the best detection, element 1 the next
+                      best, and so on.
+                    - #dets.size() == the number of detected objects.
+                    - #dets[i].detection_confidence == The strength of the i-th detection.
+                      Larger values indicate that the detector is more confident that
+                      #dets[i] is a correct detection rather than being a false alarm.
+                      Moreover, the detection_confidence is equal to the detection value
+                      output by the scanner minus the threshold value stored at the end of
+                      the weight vector in get_w(#dets[i].weight_index). 
+                    - #dets[i].weight_index == the index for the weight vector that
+                      generated this detection. 
+                    - #dets[i].rect == the bounding box for the i-th detection.
+                - #get_scanner() will have been loaded with img. Therefore, you can call
+                  #get_scanner().get_feature_vector() to obtain the feature vectors or
+                  #get_scanner().get_full_object_detection() to get the
+                  full_object_detections for the resulting object detection boxes.
+                - The detection threshold is adjusted by having adjust_threshold added to
+                  it.  Therefore, an adjust_threshold value > 0 makes detecting objects
+                  harder while a negative value makes it easier.  Moreover, the following
+                  will be true for all valid i:
+                    - #dets[i].detection_confidence >= adjust_threshold
+                  This means that, for example, you can obtain the maximum possible number
+                  of detections by setting adjust_threshold equal to negative infinity.
+        !*/
+
+        template <
+            typename image_type
+            >
+        void operator() (
+            const image_type& img,
+            std::vector<full_detection>& dets,
+            double adjust_threshold = 0
+        );
+        /*!
+            requires
+                - img == an object which can be accepted by image_scanner_type::load()
+            ensures
+                - This function is identical to the above operator() routine, except that
+                  it outputs full_object_detections instead of rectangles.  This means that
+                  the output includes part locations.  In particular, calling this function
+                  is the same as calling the above operator() routine and then using
+                  get_scanner().get_full_object_detection() to resolve all the rectangles
+                  into full_object_detections.  Therefore, this version of operator() is
+                  simply a convenience function for performing this set of operations.
+        !*/
+
+        template <
+            typename image_type
+            >
+        std::vector<rectangle> operator() (
+            const image_type& img,
+            const adjust_threshold = 0
+        );
+        /*!
+            requires
+                - img == an object which can be accepted by image_scanner_type::load()
+            ensures
+                - This function is identical to the above operator() routine, except that
+                  it returns a std::vector<rectangle> which contains just the bounding
+                  boxes of all the detections. 
+        !*/
+
+        template <
+            typename image_type
+            >
+        void operator() (
+            const image_type& img,
+            std::vector<std::pair<double, rectangle> >& dets,
+            double adjust_threshold = 0
+        );
+        /*!
+            requires
+                - img == an object which can be accepted by image_scanner_type::load()
+            ensures
+                - performs object detection on the given image and stores the
+                  detected objects into #dets.  In particular, we will have that:
+                    - #dets is sorted such that the highest confidence detections 
+                      come first.  E.g. element 0 is the best detection, element 1 
+                      the next best, and so on.
+                    - #dets.size() == the number of detected objects.
+                    - #dets[i].first gives the "detection confidence", of the i-th
+                      detection.  This is the detection value output by the scanner minus
+                      the threshold value stored at the end of the weight vector in get_w(). 
+                    - #dets[i].second == the bounding box for the i-th detection.
+                - #get_scanner() will have been loaded with img. Therefore, you can call
+                  #get_scanner().get_feature_vector() to obtain the feature vectors or
+                  #get_scanner().get_full_object_detection() to get the
+                  full_object_detections for the resulting object detection boxes.
+                - The detection threshold is adjusted by having adjust_threshold added to
+                  it.  Therefore, an adjust_threshold value > 0 makes detecting objects
+                  harder while a negative value makes it easier.  Moreover, the following
+                  will be true for all valid i:
+                    - #dets[i].first >= adjust_threshold
+                  This means that, for example, you can obtain the maximum possible number
+                  of detections by setting adjust_threshold equal to negative infinity.
+        !*/
+
+        template <
+            typename image_type
+            >
+        void operator() (
+            const image_type& img,
+            std::vector<std::pair<double, full_object_detection> >& dets,
+            double adjust_threshold = 0
+        );
+        /*!
+            requires
+                - img == an object which can be accepted by image_scanner_type::load()
+            ensures
+                - This function is identical to the above operator() routine, except that
+                  it outputs full_object_detections instead of rectangles.  This means that
+                  the output includes part locations.  In particular, calling this function
+                  is the same as calling the above operator() routine and then using
+                  get_scanner().get_full_object_detection() to resolve all the rectangles
+                  into full_object_detections.  Therefore, this version of operator() is
+                  simply a convenience function for performing this set of operations.
+        !*/
+
+        template <
+            typename image_type
+            >
+        void operator() (
+            const image_type& img,
+            std::vector<full_object_detection>& dets,
+            double adjust_threshold = 0
+        );
+        /*!
+            requires
+                - img == an object which can be accepted by image_scanner_type::load()
+            ensures
+                - This function is identical to the above operator() routine, except that
+                  it doesn't include a double valued score.  That is, it just outputs the
+                  full_object_detections.
+        !*/
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    void serialize (
+        const object_detector<T>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support.  Note that this function only saves the
+        configuration part of item.get_scanner().  That is, we use the scanner's
+        copy_configuration() function to get a copy of the scanner that doesn't contain any
+        loaded image data and we then save just the configuration part of the scanner.
+        This means that any serialized object_detectors won't remember any images they have
+        processed but will otherwise contain all their state and be able to detect objects
+        in new images.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    void deserialize (
+        object_detector<T>& item,
+        std::istream& in 
+    );
+    /*!
+        provides deserialization support
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_OBJECT_DeTECTOR_ABSTRACT_Hh_
+