summaryrefslogtreecommitdiffstats
path: root/ml/dlib/examples/train_shape_predictor_ex.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/examples/train_shape_predictor_ex.cpp')
-rw-r--r--ml/dlib/examples/train_shape_predictor_ex.cpp198
1 files changed, 0 insertions, 198 deletions
diff --git a/ml/dlib/examples/train_shape_predictor_ex.cpp b/ml/dlib/examples/train_shape_predictor_ex.cpp
deleted file mode 100644
index 05eaf4b0e..000000000
--- a/ml/dlib/examples/train_shape_predictor_ex.cpp
+++ /dev/null
@@ -1,198 +0,0 @@
-// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
-/*
-
- This example program shows how to use dlib's implementation of the paper:
- One Millisecond Face Alignment with an Ensemble of Regression Trees by
- Vahid Kazemi and Josephine Sullivan, CVPR 2014
-
- In particular, we will train a face landmarking model based on a small dataset
- and then evaluate it. If you want to visualize the output of the trained
- model on some images then you can run the face_landmark_detection_ex.cpp
- example program with sp.dat as the input model.
-
- It should also be noted that this kind of model, while often used for face
- landmarking, is quite general and can be used for a variety of shape
- prediction tasks. But here we demonstrate it only on a simple face
- landmarking task.
-*/
-
-
-#include <dlib/image_processing.h>
-#include <dlib/data_io.h>
-#include <iostream>
-
-using namespace dlib;
-using namespace std;
-
-// ----------------------------------------------------------------------------------------
-
-std::vector<std::vector<double> > get_interocular_distances (
- const std::vector<std::vector<full_object_detection> >& objects
-);
-/*!
- ensures
- - returns an object D such that:
- - D[i][j] == the distance, in pixels, between the eyes for the face represented
- by objects[i][j].
-!*/
-
-// ----------------------------------------------------------------------------------------
-
-int main(int argc, char** argv)
-{
- try
- {
- // In this example we are going to train a shape_predictor based on the
- // small faces dataset in the examples/faces directory. So the first
- // thing we do is load that dataset. This means you need to supply the
- // path to this faces folder as a command line argument so we will know
- // where it is.
- if (argc != 2)
- {
- cout << "Give the path to the examples/faces directory as the argument to this" << endl;
- cout << "program. For example, if you are in the examples folder then execute " << endl;
- cout << "this program by running: " << endl;
- cout << " ./train_shape_predictor_ex faces" << endl;
- cout << endl;
- return 0;
- }
- const std::string faces_directory = argv[1];
- // The faces directory contains a training dataset and a separate
- // testing dataset. The training data consists of 4 images, each
- // annotated with rectangles that bound each human face along with 68
- // face landmarks on each face. The idea is to use this training data
- // to learn to identify the position of landmarks on human faces in new
- // images.
- //
- // Once you have trained a shape_predictor it is always important to
- // test it on data it wasn't trained on. Therefore, we will also load
- // a separate testing set of 5 images. Once we have a shape_predictor
- // created from the training data we will see how well it works by
- // running it on the testing images.
- //
- // So here we create the variables that will hold our dataset.
- // images_train will hold the 4 training images and faces_train holds
- // the locations and poses of each face in the training images. So for
- // example, the image images_train[0] has the faces given by the
- // full_object_detections in faces_train[0].
- dlib::array<array2d<unsigned char> > images_train, images_test;
- std::vector<std::vector<full_object_detection> > faces_train, faces_test;
-
- // Now we load the data. These XML files list the images in each
- // dataset and also contain the positions of the face boxes and
- // landmarks (called parts in the XML file). Obviously you can use any
- // kind of input format you like so long as you store the data into
- // images_train and faces_train. But for convenience dlib comes with
- // tools for creating and loading XML image dataset files. Here you see
- // how to load the data. To create the XML files you can use the imglab
- // tool which can be found in the tools/imglab folder. It is a simple
- // graphical tool for labeling objects in images. To see how to use it
- // read the tools/imglab/README.txt file.
- load_image_dataset(images_train, faces_train, faces_directory+"/training_with_face_landmarks.xml");
- load_image_dataset(images_test, faces_test, faces_directory+"/testing_with_face_landmarks.xml");
-
- // Now make the object responsible for training the model.
- shape_predictor_trainer trainer;
- // This algorithm has a bunch of parameters you can mess with. The
- // documentation for the shape_predictor_trainer explains all of them.
- // You should also read Kazemi's paper which explains all the parameters
- // in great detail. However, here I'm just setting three of them
- // differently than their default values. I'm doing this because we
- // have a very small dataset. In particular, setting the oversampling
- // to a high amount (300) effectively boosts the training set size, so
- // that helps this example.
- trainer.set_oversampling_amount(300);
- // I'm also reducing the capacity of the model by explicitly increasing
- // the regularization (making nu smaller) and by using trees with
- // smaller depths.
- trainer.set_nu(0.05);
- trainer.set_tree_depth(2);
-
- // some parts of training process can be parallelized.
- // Trainer will use this count of threads when possible
- trainer.set_num_threads(2);
-
- // Tell the trainer to print status messages to the console so we can
- // see how long the training will take.
- trainer.be_verbose();
-
- // Now finally generate the shape model
- shape_predictor sp = trainer.train(images_train, faces_train);
-
-
- // Now that we have a model we can test it. This function measures the
- // average distance between a face landmark output by the
- // shape_predictor and where it should be according to the truth data.
- // Note that there is an optional 4th argument that lets us rescale the
- // distances. Here we are causing the output to scale each face's
- // distances by the interocular distance, as is customary when
- // evaluating face landmarking systems.
- cout << "mean training error: "<<
- test_shape_predictor(sp, images_train, faces_train, get_interocular_distances(faces_train)) << endl;
-
- // The real test is to see how well it does on data it wasn't trained
- // on. We trained it on a very small dataset so the accuracy is not
- // extremely high, but it's still doing quite good. Moreover, if you
- // train it on one of the large face landmarking datasets you will
- // obtain state-of-the-art results, as shown in the Kazemi paper.
- cout << "mean testing error: "<<
- test_shape_predictor(sp, images_test, faces_test, get_interocular_distances(faces_test)) << endl;
-
- // Finally, we save the model to disk so we can use it later.
- serialize("sp.dat") << sp;
- }
- catch (exception& e)
- {
- cout << "\nexception thrown!" << endl;
- cout << e.what() << endl;
- }
-}
-
-// ----------------------------------------------------------------------------------------
-
-double interocular_distance (
- const full_object_detection& det
-)
-{
- dlib::vector<double,2> l, r;
- double cnt = 0;
- // Find the center of the left eye by averaging the points around
- // the eye.
- for (unsigned long i = 36; i <= 41; ++i)
- {
- l += det.part(i);
- ++cnt;
- }
- l /= cnt;
-
- // Find the center of the right eye by averaging the points around
- // the eye.
- cnt = 0;
- for (unsigned long i = 42; i <= 47; ++i)
- {
- r += det.part(i);
- ++cnt;
- }
- r /= cnt;
-
- // Now return the distance between the centers of the eyes
- return length(l-r);
-}
-
-std::vector<std::vector<double> > get_interocular_distances (
- const std::vector<std::vector<full_object_detection> >& objects
-)
-{
- std::vector<std::vector<double> > temp(objects.size());
- for (unsigned long i = 0; i < objects.size(); ++i)
- {
- for (unsigned long j = 0; j < objects[i].size(); ++j)
- {
- temp[i].push_back(interocular_distance(objects[i][j]));
- }
- }
- return temp;
-}
-
-// ----------------------------------------------------------------------------------------
-