summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/image_processing/shape_predictor_abstract.h
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/dlib/image_processing/shape_predictor_abstract.h')
-rw-r--r--ml/dlib/dlib/image_processing/shape_predictor_abstract.h195
1 files changed, 195 insertions, 0 deletions
diff --git a/ml/dlib/dlib/image_processing/shape_predictor_abstract.h b/ml/dlib/dlib/image_processing/shape_predictor_abstract.h
new file mode 100644
index 000000000..718b4952e
--- /dev/null
+++ b/ml/dlib/dlib/image_processing/shape_predictor_abstract.h
@@ -0,0 +1,195 @@
+// Copyright (C) 2014 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_SHAPE_PREDICToR_ABSTRACT_H_
+#ifdef DLIB_SHAPE_PREDICToR_ABSTRACT_H_
+
+#include "full_object_detection_abstract.h"
+#include "../matrix.h"
+#include "../geometry.h"
+#include "../pixel.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ class shape_predictor
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This object is a tool that takes in an image region containing some object
+ and outputs a set of point locations that define the pose of the object.
+ The classic example of this is human face pose prediction, where you take
+ an image of a human face as input and are expected to identify the
+ locations of important facial landmarks such as the corners of the mouth
+ and eyes, tip of the nose, and so forth.
+
+ To create useful instantiations of this object you need to use the
+ shape_predictor_trainer object defined in the
+ shape_predictor_trainer_abstract.h file to train a shape_predictor using a
+ set of training images, each annotated with shapes you want to predict.
+
+ THREAD SAFETY
+ No synchronization is required when using this object. In particular, a
+ single instance of this object can be used from multiple threads at the
+ same time.
+ !*/
+
+ public:
+
+ shape_predictor (
+ );
+ /*!
+ ensures
+ - #num_parts() == 0
+ - #num_features() == 0
+ !*/
+
+ unsigned long num_parts (
+ ) const;
+ /*!
+ ensures
+ - returns the number of parts in the shapes predicted by this object.
+ !*/
+
+ unsigned long num_features (
+ ) const;
+ /*!
+ ensures
+ - Returns the dimensionality of the feature vector output by operator().
+ This number is the total number of trees in this object times the number
+ of leaves on each tree.
+ !*/
+
+ template <typename image_type, typename T, typename U>
+ full_object_detection operator()(
+ const image_type& img,
+ const rectangle& rect,
+ std::vector<std::pair<T,U> >& feats
+ ) const;
+ /*!
+ requires
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - T is some unsigned integral type (e.g. unsigned int).
+ - U is any scalar type capable of storing the value 1 (e.g. float).
+ ensures
+ - Runs the shape prediction algorithm on the part of the image contained in
+ the given bounding rectangle. So it will try and fit the shape model to
+ the contents of the given rectangle in the image. For example, if there
+ is a human face inside the rectangle and you use a face landmarking shape
+ model then this function will return the locations of the face landmarks
+ as the parts. So the return value is a full_object_detection DET such
+ that:
+ - DET.get_rect() == rect
+ - DET.num_parts() == num_parts()
+ - for all valid i:
+ - DET.part(i) == the location in img for the i-th part of the shape
+ predicted by this object.
+ - #feats == a sparse vector that records which leaf each tree used to make
+ the shape prediction. Moreover, it is an indicator vector, Therefore,
+ for all valid i:
+ - #feats[i].second == 1
+ Further, #feats is a vector from the space of num_features() dimensional
+ vectors. The output shape positions can be represented as the dot
+ product between #feats and a weight vector. Therefore, #feats encodes
+ all the information from img that was used to predict the returned shape
+ object.
+ !*/
+
+ template <typename image_type>
+ full_object_detection operator()(
+ const image_type& img,
+ const rectangle& rect
+ ) const;
+ /*!
+ requires
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ ensures
+ - Calling this function is equivalent to calling (*this)(img, rect, ignored)
+ where the 3d argument is discarded.
+ !*/
+
+ };
+
+ void serialize (const shape_predictor& item, std::ostream& out);
+ void deserialize (shape_predictor& item, std::istream& in);
+ /*!
+ provides serialization support
+ !*/
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_array
+ >
+ double test_shape_predictor (
+ const shape_predictor& sp,
+ const image_array& images,
+ const std::vector<std::vector<full_object_detection> >& objects,
+ const std::vector<std::vector<double> >& scales
+ );
+ /*!
+ requires
+ - image_array is a dlib::array of image objects where each image object
+ implements the interface defined in dlib/image_processing/generic_image.h
+ - images.size() == objects.size()
+ - for all valid i and j:
+ - objects[i][j].num_parts() == sp.num_parts()
+ - if (scales.size() != 0) then
+ - There must be a scale value for each full_object_detection in objects.
+ That is, it must be the case that:
+ - scales.size() == objects.size()
+ - for all valid i:
+ - scales[i].size() == objects[i].size()
+ ensures
+ - Tests the given shape_predictor by running it on each of the given objects and
+ checking how well it recovers the part positions. In particular, for all
+ valid i and j we perform:
+ sp(images[i], objects[i][j].get_rect())
+ and compare the result with the truth part positions in objects[i][j]. We
+ then return the average distance (measured in pixels) between a predicted
+ part location and its true position.
+ - Note that any parts in objects that are set to OBJECT_PART_NOT_PRESENT are
+ simply ignored.
+ - if (scales.size() != 0) then
+ - Each time we compute the distance between a predicted part location and
+ its true location in objects[i][j] we divide the distance by
+ scales[i][j]. Therefore, if you want the reported error to be the
+ average pixel distance then give an empty scales vector, but if you want
+ the returned value to be something else like the average distance
+ normalized by some feature of each object (e.g. the interocular distance)
+ then you can supply those normalizing values via scales.
+ !*/
+
+ template <
+ typename image_array
+ >
+ double test_shape_predictor (
+ const shape_predictor& sp,
+ const image_array& images,
+ const std::vector<std::vector<full_object_detection> >& objects
+ );
+ /*!
+ requires
+ - image_array is a dlib::array of image objects where each image object
+ implements the interface defined in dlib/image_processing/generic_image.h
+ - images.size() == objects.size()
+ - for all valid i and j:
+ - objects[i][j].num_parts() == sp.num_parts()
+ ensures
+ - returns test_shape_predictor(sp, images, objects, no_scales) where no_scales
+ is an empty vector. So this is just a convenience function for calling the
+ above test_shape_predictor() routine without a scales argument.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SHAPE_PREDICToR_ABSTRACT_H_
+