diff options
Diffstat (limited to '')
-rw-r--r-- | ml/dlib/examples/train_shape_predictor_ex.cpp | 198 |
1 files changed, 198 insertions, 0 deletions
diff --git a/ml/dlib/examples/train_shape_predictor_ex.cpp b/ml/dlib/examples/train_shape_predictor_ex.cpp new file mode 100644 index 000000000..05eaf4b0e --- /dev/null +++ b/ml/dlib/examples/train_shape_predictor_ex.cpp @@ -0,0 +1,198 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This example program shows how to use dlib's implementation of the paper: + One Millisecond Face Alignment with an Ensemble of Regression Trees by + Vahid Kazemi and Josephine Sullivan, CVPR 2014 + + In particular, we will train a face landmarking model based on a small dataset + and then evaluate it. If you want to visualize the output of the trained + model on some images then you can run the face_landmark_detection_ex.cpp + example program with sp.dat as the input model. + + It should also be noted that this kind of model, while often used for face + landmarking, is quite general and can be used for a variety of shape + prediction tasks. But here we demonstrate it only on a simple face + landmarking task. +*/ + + +#include <dlib/image_processing.h> +#include <dlib/data_io.h> +#include <iostream> + +using namespace dlib; +using namespace std; + +// ---------------------------------------------------------------------------------------- + +std::vector<std::vector<double> > get_interocular_distances ( + const std::vector<std::vector<full_object_detection> >& objects +); +/*! + ensures + - returns an object D such that: + - D[i][j] == the distance, in pixels, between the eyes for the face represented + by objects[i][j]. +!*/ + +// ---------------------------------------------------------------------------------------- + +int main(int argc, char** argv) +{ + try + { + // In this example we are going to train a shape_predictor based on the + // small faces dataset in the examples/faces directory. So the first + // thing we do is load that dataset. This means you need to supply the + // path to this faces folder as a command line argument so we will know + // where it is. + if (argc != 2) + { + cout << "Give the path to the examples/faces directory as the argument to this" << endl; + cout << "program. For example, if you are in the examples folder then execute " << endl; + cout << "this program by running: " << endl; + cout << " ./train_shape_predictor_ex faces" << endl; + cout << endl; + return 0; + } + const std::string faces_directory = argv[1]; + // The faces directory contains a training dataset and a separate + // testing dataset. The training data consists of 4 images, each + // annotated with rectangles that bound each human face along with 68 + // face landmarks on each face. The idea is to use this training data + // to learn to identify the position of landmarks on human faces in new + // images. + // + // Once you have trained a shape_predictor it is always important to + // test it on data it wasn't trained on. Therefore, we will also load + // a separate testing set of 5 images. Once we have a shape_predictor + // created from the training data we will see how well it works by + // running it on the testing images. + // + // So here we create the variables that will hold our dataset. + // images_train will hold the 4 training images and faces_train holds + // the locations and poses of each face in the training images. So for + // example, the image images_train[0] has the faces given by the + // full_object_detections in faces_train[0]. + dlib::array<array2d<unsigned char> > images_train, images_test; + std::vector<std::vector<full_object_detection> > faces_train, faces_test; + + // Now we load the data. These XML files list the images in each + // dataset and also contain the positions of the face boxes and + // landmarks (called parts in the XML file). Obviously you can use any + // kind of input format you like so long as you store the data into + // images_train and faces_train. But for convenience dlib comes with + // tools for creating and loading XML image dataset files. Here you see + // how to load the data. To create the XML files you can use the imglab + // tool which can be found in the tools/imglab folder. It is a simple + // graphical tool for labeling objects in images. To see how to use it + // read the tools/imglab/README.txt file. + load_image_dataset(images_train, faces_train, faces_directory+"/training_with_face_landmarks.xml"); + load_image_dataset(images_test, faces_test, faces_directory+"/testing_with_face_landmarks.xml"); + + // Now make the object responsible for training the model. + shape_predictor_trainer trainer; + // This algorithm has a bunch of parameters you can mess with. The + // documentation for the shape_predictor_trainer explains all of them. + // You should also read Kazemi's paper which explains all the parameters + // in great detail. However, here I'm just setting three of them + // differently than their default values. I'm doing this because we + // have a very small dataset. In particular, setting the oversampling + // to a high amount (300) effectively boosts the training set size, so + // that helps this example. + trainer.set_oversampling_amount(300); + // I'm also reducing the capacity of the model by explicitly increasing + // the regularization (making nu smaller) and by using trees with + // smaller depths. + trainer.set_nu(0.05); + trainer.set_tree_depth(2); + + // some parts of training process can be parallelized. + // Trainer will use this count of threads when possible + trainer.set_num_threads(2); + + // Tell the trainer to print status messages to the console so we can + // see how long the training will take. + trainer.be_verbose(); + + // Now finally generate the shape model + shape_predictor sp = trainer.train(images_train, faces_train); + + + // Now that we have a model we can test it. This function measures the + // average distance between a face landmark output by the + // shape_predictor and where it should be according to the truth data. + // Note that there is an optional 4th argument that lets us rescale the + // distances. Here we are causing the output to scale each face's + // distances by the interocular distance, as is customary when + // evaluating face landmarking systems. + cout << "mean training error: "<< + test_shape_predictor(sp, images_train, faces_train, get_interocular_distances(faces_train)) << endl; + + // The real test is to see how well it does on data it wasn't trained + // on. We trained it on a very small dataset so the accuracy is not + // extremely high, but it's still doing quite good. Moreover, if you + // train it on one of the large face landmarking datasets you will + // obtain state-of-the-art results, as shown in the Kazemi paper. + cout << "mean testing error: "<< + test_shape_predictor(sp, images_test, faces_test, get_interocular_distances(faces_test)) << endl; + + // Finally, we save the model to disk so we can use it later. + serialize("sp.dat") << sp; + } + catch (exception& e) + { + cout << "\nexception thrown!" << endl; + cout << e.what() << endl; + } +} + +// ---------------------------------------------------------------------------------------- + +double interocular_distance ( + const full_object_detection& det +) +{ + dlib::vector<double,2> l, r; + double cnt = 0; + // Find the center of the left eye by averaging the points around + // the eye. + for (unsigned long i = 36; i <= 41; ++i) + { + l += det.part(i); + ++cnt; + } + l /= cnt; + + // Find the center of the right eye by averaging the points around + // the eye. + cnt = 0; + for (unsigned long i = 42; i <= 47; ++i) + { + r += det.part(i); + ++cnt; + } + r /= cnt; + + // Now return the distance between the centers of the eyes + return length(l-r); +} + +std::vector<std::vector<double> > get_interocular_distances ( + const std::vector<std::vector<full_object_detection> >& objects +) +{ + std::vector<std::vector<double> > temp(objects.size()); + for (unsigned long i = 0; i < objects.size(); ++i) + { + for (unsigned long j = 0; j < objects[i].size(); ++j) + { + temp[i].push_back(interocular_distance(objects[i][j])); + } + } + return temp; +} + +// ---------------------------------------------------------------------------------------- + |