summaryrefslogtreecommitdiffstats
path: root/ml/dlib/examples/train_shape_predictor_ex.cpp
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-03-09 13:19:22 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-03-09 13:19:22 +0000
commitc21c3b0befeb46a51b6bf3758ffa30813bea0ff0 (patch)
tree9754ff1ca740f6346cf8483ec915d4054bc5da2d /ml/dlib/examples/train_shape_predictor_ex.cpp
parentAdding upstream version 1.43.2. (diff)
downloadnetdata-c21c3b0befeb46a51b6bf3758ffa30813bea0ff0.tar.xz
netdata-c21c3b0befeb46a51b6bf3758ffa30813bea0ff0.zip
Adding upstream version 1.44.3.upstream/1.44.3
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'ml/dlib/examples/train_shape_predictor_ex.cpp')
-rw-r--r--ml/dlib/examples/train_shape_predictor_ex.cpp198
1 files changed, 198 insertions, 0 deletions
diff --git a/ml/dlib/examples/train_shape_predictor_ex.cpp b/ml/dlib/examples/train_shape_predictor_ex.cpp
new file mode 100644
index 000000000..05eaf4b0e
--- /dev/null
+++ b/ml/dlib/examples/train_shape_predictor_ex.cpp
@@ -0,0 +1,198 @@
+// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
+/*
+
+ This example program shows how to use dlib's implementation of the paper:
+ One Millisecond Face Alignment with an Ensemble of Regression Trees by
+ Vahid Kazemi and Josephine Sullivan, CVPR 2014
+
+ In particular, we will train a face landmarking model based on a small dataset
+ and then evaluate it. If you want to visualize the output of the trained
+ model on some images then you can run the face_landmark_detection_ex.cpp
+ example program with sp.dat as the input model.
+
+ It should also be noted that this kind of model, while often used for face
+ landmarking, is quite general and can be used for a variety of shape
+ prediction tasks. But here we demonstrate it only on a simple face
+ landmarking task.
+*/
+
+
+#include <dlib/image_processing.h>
+#include <dlib/data_io.h>
+#include <iostream>
+
+using namespace dlib;
+using namespace std;
+
+// ----------------------------------------------------------------------------------------
+
+std::vector<std::vector<double> > get_interocular_distances (
+ const std::vector<std::vector<full_object_detection> >& objects
+);
+/*!
+ ensures
+ - returns an object D such that:
+ - D[i][j] == the distance, in pixels, between the eyes for the face represented
+ by objects[i][j].
+!*/
+
+// ----------------------------------------------------------------------------------------
+
+int main(int argc, char** argv)
+{
+ try
+ {
+ // In this example we are going to train a shape_predictor based on the
+ // small faces dataset in the examples/faces directory. So the first
+ // thing we do is load that dataset. This means you need to supply the
+ // path to this faces folder as a command line argument so we will know
+ // where it is.
+ if (argc != 2)
+ {
+ cout << "Give the path to the examples/faces directory as the argument to this" << endl;
+ cout << "program. For example, if you are in the examples folder then execute " << endl;
+ cout << "this program by running: " << endl;
+ cout << " ./train_shape_predictor_ex faces" << endl;
+ cout << endl;
+ return 0;
+ }
+ const std::string faces_directory = argv[1];
+ // The faces directory contains a training dataset and a separate
+ // testing dataset. The training data consists of 4 images, each
+ // annotated with rectangles that bound each human face along with 68
+ // face landmarks on each face. The idea is to use this training data
+ // to learn to identify the position of landmarks on human faces in new
+ // images.
+ //
+ // Once you have trained a shape_predictor it is always important to
+ // test it on data it wasn't trained on. Therefore, we will also load
+ // a separate testing set of 5 images. Once we have a shape_predictor
+ // created from the training data we will see how well it works by
+ // running it on the testing images.
+ //
+ // So here we create the variables that will hold our dataset.
+ // images_train will hold the 4 training images and faces_train holds
+ // the locations and poses of each face in the training images. So for
+ // example, the image images_train[0] has the faces given by the
+ // full_object_detections in faces_train[0].
+ dlib::array<array2d<unsigned char> > images_train, images_test;
+ std::vector<std::vector<full_object_detection> > faces_train, faces_test;
+
+ // Now we load the data. These XML files list the images in each
+ // dataset and also contain the positions of the face boxes and
+ // landmarks (called parts in the XML file). Obviously you can use any
+ // kind of input format you like so long as you store the data into
+ // images_train and faces_train. But for convenience dlib comes with
+ // tools for creating and loading XML image dataset files. Here you see
+ // how to load the data. To create the XML files you can use the imglab
+ // tool which can be found in the tools/imglab folder. It is a simple
+ // graphical tool for labeling objects in images. To see how to use it
+ // read the tools/imglab/README.txt file.
+ load_image_dataset(images_train, faces_train, faces_directory+"/training_with_face_landmarks.xml");
+ load_image_dataset(images_test, faces_test, faces_directory+"/testing_with_face_landmarks.xml");
+
+ // Now make the object responsible for training the model.
+ shape_predictor_trainer trainer;
+ // This algorithm has a bunch of parameters you can mess with. The
+ // documentation for the shape_predictor_trainer explains all of them.
+ // You should also read Kazemi's paper which explains all the parameters
+ // in great detail. However, here I'm just setting three of them
+ // differently than their default values. I'm doing this because we
+ // have a very small dataset. In particular, setting the oversampling
+ // to a high amount (300) effectively boosts the training set size, so
+ // that helps this example.
+ trainer.set_oversampling_amount(300);
+ // I'm also reducing the capacity of the model by explicitly increasing
+ // the regularization (making nu smaller) and by using trees with
+ // smaller depths.
+ trainer.set_nu(0.05);
+ trainer.set_tree_depth(2);
+
+ // some parts of training process can be parallelized.
+ // Trainer will use this count of threads when possible
+ trainer.set_num_threads(2);
+
+ // Tell the trainer to print status messages to the console so we can
+ // see how long the training will take.
+ trainer.be_verbose();
+
+ // Now finally generate the shape model
+ shape_predictor sp = trainer.train(images_train, faces_train);
+
+
+ // Now that we have a model we can test it. This function measures the
+ // average distance between a face landmark output by the
+ // shape_predictor and where it should be according to the truth data.
+ // Note that there is an optional 4th argument that lets us rescale the
+ // distances. Here we are causing the output to scale each face's
+ // distances by the interocular distance, as is customary when
+ // evaluating face landmarking systems.
+ cout << "mean training error: "<<
+ test_shape_predictor(sp, images_train, faces_train, get_interocular_distances(faces_train)) << endl;
+
+ // The real test is to see how well it does on data it wasn't trained
+ // on. We trained it on a very small dataset so the accuracy is not
+ // extremely high, but it's still doing quite good. Moreover, if you
+ // train it on one of the large face landmarking datasets you will
+ // obtain state-of-the-art results, as shown in the Kazemi paper.
+ cout << "mean testing error: "<<
+ test_shape_predictor(sp, images_test, faces_test, get_interocular_distances(faces_test)) << endl;
+
+ // Finally, we save the model to disk so we can use it later.
+ serialize("sp.dat") << sp;
+ }
+ catch (exception& e)
+ {
+ cout << "\nexception thrown!" << endl;
+ cout << e.what() << endl;
+ }
+}
+
+// ----------------------------------------------------------------------------------------
+
+double interocular_distance (
+ const full_object_detection& det
+)
+{
+ dlib::vector<double,2> l, r;
+ double cnt = 0;
+ // Find the center of the left eye by averaging the points around
+ // the eye.
+ for (unsigned long i = 36; i <= 41; ++i)
+ {
+ l += det.part(i);
+ ++cnt;
+ }
+ l /= cnt;
+
+ // Find the center of the right eye by averaging the points around
+ // the eye.
+ cnt = 0;
+ for (unsigned long i = 42; i <= 47; ++i)
+ {
+ r += det.part(i);
+ ++cnt;
+ }
+ r /= cnt;
+
+ // Now return the distance between the centers of the eyes
+ return length(l-r);
+}
+
+std::vector<std::vector<double> > get_interocular_distances (
+ const std::vector<std::vector<full_object_detection> >& objects
+)
+{
+ std::vector<std::vector<double> > temp(objects.size());
+ for (unsigned long i = 0; i < objects.size(); ++i)
+ {
+ for (unsigned long j = 0; j < objects[i].size(); ++j)
+ {
+ temp[i].push_back(interocular_distance(objects[i][j]));
+ }
+ }
+ return temp;
+}
+
+// ----------------------------------------------------------------------------------------
+