diff options
Diffstat (limited to 'ml/dlib/dlib/image_processing/shape_predictor_abstract.h')
-rw-r--r-- | ml/dlib/dlib/image_processing/shape_predictor_abstract.h | 195 |
1 files changed, 195 insertions, 0 deletions
diff --git a/ml/dlib/dlib/image_processing/shape_predictor_abstract.h b/ml/dlib/dlib/image_processing/shape_predictor_abstract.h new file mode 100644 index 000000000..718b4952e --- /dev/null +++ b/ml/dlib/dlib/image_processing/shape_predictor_abstract.h @@ -0,0 +1,195 @@ +// Copyright (C) 2014 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_SHAPE_PREDICToR_ABSTRACT_H_ +#ifdef DLIB_SHAPE_PREDICToR_ABSTRACT_H_ + +#include "full_object_detection_abstract.h" +#include "../matrix.h" +#include "../geometry.h" +#include "../pixel.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + class shape_predictor + { + /*! + WHAT THIS OBJECT REPRESENTS + This object is a tool that takes in an image region containing some object + and outputs a set of point locations that define the pose of the object. + The classic example of this is human face pose prediction, where you take + an image of a human face as input and are expected to identify the + locations of important facial landmarks such as the corners of the mouth + and eyes, tip of the nose, and so forth. + + To create useful instantiations of this object you need to use the + shape_predictor_trainer object defined in the + shape_predictor_trainer_abstract.h file to train a shape_predictor using a + set of training images, each annotated with shapes you want to predict. + + THREAD SAFETY + No synchronization is required when using this object. In particular, a + single instance of this object can be used from multiple threads at the + same time. + !*/ + + public: + + shape_predictor ( + ); + /*! + ensures + - #num_parts() == 0 + - #num_features() == 0 + !*/ + + unsigned long num_parts ( + ) const; + /*! + ensures + - returns the number of parts in the shapes predicted by this object. + !*/ + + unsigned long num_features ( + ) const; + /*! + ensures + - Returns the dimensionality of the feature vector output by operator(). + This number is the total number of trees in this object times the number + of leaves on each tree. + !*/ + + template <typename image_type, typename T, typename U> + full_object_detection operator()( + const image_type& img, + const rectangle& rect, + std::vector<std::pair<T,U> >& feats + ) const; + /*! + requires + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - T is some unsigned integral type (e.g. unsigned int). + - U is any scalar type capable of storing the value 1 (e.g. float). + ensures + - Runs the shape prediction algorithm on the part of the image contained in + the given bounding rectangle. So it will try and fit the shape model to + the contents of the given rectangle in the image. For example, if there + is a human face inside the rectangle and you use a face landmarking shape + model then this function will return the locations of the face landmarks + as the parts. So the return value is a full_object_detection DET such + that: + - DET.get_rect() == rect + - DET.num_parts() == num_parts() + - for all valid i: + - DET.part(i) == the location in img for the i-th part of the shape + predicted by this object. + - #feats == a sparse vector that records which leaf each tree used to make + the shape prediction. Moreover, it is an indicator vector, Therefore, + for all valid i: + - #feats[i].second == 1 + Further, #feats is a vector from the space of num_features() dimensional + vectors. The output shape positions can be represented as the dot + product between #feats and a weight vector. Therefore, #feats encodes + all the information from img that was used to predict the returned shape + object. + !*/ + + template <typename image_type> + full_object_detection operator()( + const image_type& img, + const rectangle& rect + ) const; + /*! + requires + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + ensures + - Calling this function is equivalent to calling (*this)(img, rect, ignored) + where the 3d argument is discarded. + !*/ + + }; + + void serialize (const shape_predictor& item, std::ostream& out); + void deserialize (shape_predictor& item, std::istream& in); + /*! + provides serialization support + !*/ + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + + template < + typename image_array + > + double test_shape_predictor ( + const shape_predictor& sp, + const image_array& images, + const std::vector<std::vector<full_object_detection> >& objects, + const std::vector<std::vector<double> >& scales + ); + /*! + requires + - image_array is a dlib::array of image objects where each image object + implements the interface defined in dlib/image_processing/generic_image.h + - images.size() == objects.size() + - for all valid i and j: + - objects[i][j].num_parts() == sp.num_parts() + - if (scales.size() != 0) then + - There must be a scale value for each full_object_detection in objects. + That is, it must be the case that: + - scales.size() == objects.size() + - for all valid i: + - scales[i].size() == objects[i].size() + ensures + - Tests the given shape_predictor by running it on each of the given objects and + checking how well it recovers the part positions. In particular, for all + valid i and j we perform: + sp(images[i], objects[i][j].get_rect()) + and compare the result with the truth part positions in objects[i][j]. We + then return the average distance (measured in pixels) between a predicted + part location and its true position. + - Note that any parts in objects that are set to OBJECT_PART_NOT_PRESENT are + simply ignored. + - if (scales.size() != 0) then + - Each time we compute the distance between a predicted part location and + its true location in objects[i][j] we divide the distance by + scales[i][j]. Therefore, if you want the reported error to be the + average pixel distance then give an empty scales vector, but if you want + the returned value to be something else like the average distance + normalized by some feature of each object (e.g. the interocular distance) + then you can supply those normalizing values via scales. + !*/ + + template < + typename image_array + > + double test_shape_predictor ( + const shape_predictor& sp, + const image_array& images, + const std::vector<std::vector<full_object_detection> >& objects + ); + /*! + requires + - image_array is a dlib::array of image objects where each image object + implements the interface defined in dlib/image_processing/generic_image.h + - images.size() == objects.size() + - for all valid i and j: + - objects[i][j].num_parts() == sp.num_parts() + ensures + - returns test_shape_predictor(sp, images, objects, no_scales) where no_scales + is an empty vector. So this is just a convenience function for calling the + above test_shape_predictor() routine without a scales argument. + !*/ + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_SHAPE_PREDICToR_ABSTRACT_H_ + |