diff options
Diffstat (limited to 'ml/dlib/python_examples')
19 files changed, 2041 insertions, 0 deletions
diff --git a/ml/dlib/python_examples/LICENSE_FOR_EXAMPLE_PROGRAMS.txt b/ml/dlib/python_examples/LICENSE_FOR_EXAMPLE_PROGRAMS.txt new file mode 100644 index 00000000..2bdbd569 --- /dev/null +++ b/ml/dlib/python_examples/LICENSE_FOR_EXAMPLE_PROGRAMS.txt @@ -0,0 +1,20 @@ +The intent of the example programs supplied with the dlib C++ library is +to both instruct users and to also provide a simple body of code they +may copy and paste from. To make this as painless as possible all the +example programs have been placed into the public domain. + + +This work is hereby released into the Public Domain. +To view a copy of the public domain dedication, visit +http://creativecommons.org/licenses/publicdomain/ or send a +letter to + Creative Commons + 171 Second Street + Suite 300, + San Francisco, California, 94105, USA. + + +Public domain dedications are not recognized by some countries. So +if you live in an area where the above dedication isn't valid then +you can consider the example programs to be licensed under the Boost +Software License. diff --git a/ml/dlib/python_examples/cnn_face_detector.py b/ml/dlib/python_examples/cnn_face_detector.py new file mode 100755 index 00000000..75357a62 --- /dev/null +++ b/ml/dlib/python_examples/cnn_face_detector.py @@ -0,0 +1,85 @@ +#!/usr/bin/python +# The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +# +# This example shows how to run a CNN based face detector using dlib. The +# example loads a pretrained model and uses it to find faces in images. The +# CNN model is much more accurate than the HOG based model shown in the +# face_detector.py example, but takes much more computational power to +# run, and is meant to be executed on a GPU to attain reasonable speed. +# +# You can download the pre-trained model from: +# http://dlib.net/files/mmod_human_face_detector.dat.bz2 +# +# The examples/faces folder contains some jpg images of people. You can run +# this program on them and see the detections by executing the +# following command: +# ./cnn_face_detector.py mmod_human_face_detector.dat ../examples/faces/*.jpg +# +# +# COMPILING/INSTALLING THE DLIB PYTHON INTERFACE +# You can install dlib using the command: +# pip install dlib +# +# Alternatively, if you want to compile dlib yourself then go into the dlib +# root folder and run: +# python setup.py install +# or +# python setup.py install --yes USE_AVX_INSTRUCTIONS --yes DLIB_USE_CUDA +# if you have a CPU that supports AVX instructions, you have an Nvidia GPU +# and you have CUDA installed since this makes things run *much* faster. +# +# Compiling dlib should work on any operating system so long as you have +# CMake installed. On Ubuntu, this can be done easily by running the +# command: +# sudo apt-get install cmake +# +# Also note that this example requires scikit-image which can be installed +# via the command: +# pip install scikit-image +# Or downloaded from http://scikit-image.org/download.html. + +import sys +import dlib +from skimage import io + +if len(sys.argv) < 3: + print( + "Call this program like this:\n" + " ./cnn_face_detector.py mmod_human_face_detector.dat ../examples/faces/*.jpg\n" + "You can get the mmod_human_face_detector.dat file from:\n" + " http://dlib.net/files/mmod_human_face_detector.dat.bz2") + exit() + +cnn_face_detector = dlib.cnn_face_detection_model_v1(sys.argv[1]) +win = dlib.image_window() + +for f in sys.argv[2:]: + print("Processing file: {}".format(f)) + img = io.imread(f) + # The 1 in the second argument indicates that we should upsample the image + # 1 time. This will make everything bigger and allow us to detect more + # faces. + dets = cnn_face_detector(img, 1) + ''' + This detector returns a mmod_rectangles object. This object contains a list of mmod_rectangle objects. + These objects can be accessed by simply iterating over the mmod_rectangles object + The mmod_rectangle object has two member variables, a dlib.rectangle object, and a confidence score. + + It is also possible to pass a list of images to the detector. + - like this: dets = cnn_face_detector([image list], upsample_num, batch_size = 128) + + In this case it will return a mmod_rectangless object. + This object behaves just like a list of lists and can be iterated over. + ''' + print("Number of faces detected: {}".format(len(dets))) + for i, d in enumerate(dets): + print("Detection {}: Left: {} Top: {} Right: {} Bottom: {} Confidence: {}".format( + i, d.rect.left(), d.rect.top(), d.rect.right(), d.rect.bottom(), d.confidence)) + + rects = dlib.rectangles() + rects.extend([d.rect for d in dets]) + + win.clear_overlay() + win.set_image(img) + win.add_overlay(rects) + dlib.hit_enter_to_continue() diff --git a/ml/dlib/python_examples/correlation_tracker.py b/ml/dlib/python_examples/correlation_tracker.py new file mode 100755 index 00000000..4493a55b --- /dev/null +++ b/ml/dlib/python_examples/correlation_tracker.py @@ -0,0 +1,72 @@ +#!/usr/bin/python +# The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +# +# This example shows how to use the correlation_tracker from the dlib Python +# library. This object lets you track the position of an object as it moves +# from frame to frame in a video sequence. To use it, you give the +# correlation_tracker the bounding box of the object you want to track in the +# current video frame. Then it will identify the location of the object in +# subsequent frames. +# +# In this particular example, we are going to run on the +# video sequence that comes with dlib, which can be found in the +# examples/video_frames folder. This video shows a juice box sitting on a table +# and someone is waving the camera around. The task is to track the position of +# the juice box as the camera moves around. +# +# +# COMPILING/INSTALLING THE DLIB PYTHON INTERFACE +# You can install dlib using the command: +# pip install dlib +# +# Alternatively, if you want to compile dlib yourself then go into the dlib +# root folder and run: +# python setup.py install +# or +# python setup.py install --yes USE_AVX_INSTRUCTIONS +# if you have a CPU that supports AVX instructions, since this makes some +# things run faster. +# +# Compiling dlib should work on any operating system so long as you have +# CMake installed. On Ubuntu, this can be done easily by running the +# command: +# sudo apt-get install cmake +# +# Also note that this example requires scikit-image which can be installed +# via the command: +# pip install scikit-image +# Or downloaded from http://scikit-image.org/download.html. + +import os +import glob + +import dlib +from skimage import io + +# Path to the video frames +video_folder = os.path.join("..", "examples", "video_frames") + +# Create the correlation tracker - the object needs to be initialized +# before it can be used +tracker = dlib.correlation_tracker() + +win = dlib.image_window() +# We will track the frames as we load them off of disk +for k, f in enumerate(sorted(glob.glob(os.path.join(video_folder, "*.jpg")))): + print("Processing Frame {}".format(k)) + img = io.imread(f) + + # We need to initialize the tracker on the first frame + if k == 0: + # Start a track on the juice box. If you look at the first frame you + # will see that the juice box is contained within the bounding + # box (74, 67, 112, 153). + tracker.start_track(img, dlib.rectangle(74, 67, 112, 153)) + else: + # Else we just attempt to track from the previous frame + tracker.update(img) + + win.clear_overlay() + win.set_image(img) + win.add_overlay(tracker.get_position()) + dlib.hit_enter_to_continue() diff --git a/ml/dlib/python_examples/face_alignment.py b/ml/dlib/python_examples/face_alignment.py new file mode 100755 index 00000000..53df7a3e --- /dev/null +++ b/ml/dlib/python_examples/face_alignment.py @@ -0,0 +1,91 @@ +#!/usr/bin/python +# The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +# +# This example shows how to use dlib's face recognition tool for image alignment. +# +# COMPILING/INSTALLING THE DLIB PYTHON INTERFACE +# You can install dlib using the command: +# pip install dlib +# +# Alternatively, if you want to compile dlib yourself then go into the dlib +# root folder and run: +# python setup.py install +# or +# python setup.py install --yes USE_AVX_INSTRUCTIONS +# if you have a CPU that supports AVX instructions, since this makes some +# things run faster. This code will also use CUDA if you have CUDA and cuDNN +# installed. +# +# Compiling dlib should work on any operating system so long as you have +# CMake installed. On Ubuntu, this can be done easily by running the +# command: +# sudo apt-get install cmake +# +# Also note that this example requires OpenCV and Numpy which can be installed +# via the command: +# pip install opencv-python numpy +# Or downloaded from http://opencv.org/releases.html + +import sys + +import dlib +import cv2 +import numpy as np + +if len(sys.argv) != 3: + print( + "Call this program like this:\n" + " ./face_alignment.py shape_predictor_5_face_landmarks.dat ../examples/faces/bald_guys.jpg\n" + "You can download a trained facial shape predictor from:\n" + " http://dlib.net/files/shape_predictor_5_face_landmarks.dat.bz2\n") + exit() + +predictor_path = sys.argv[1] +face_file_path = sys.argv[2] + +# Load all the models we need: a detector to find the faces, a shape predictor +# to find face landmarks so we can precisely localize the face +detector = dlib.get_frontal_face_detector() +sp = dlib.shape_predictor(predictor_path) + +# Load the image using OpenCV +bgr_img = cv2.imread(face_file_path) +if bgr_img is None: + print("Sorry, we could not load '{}' as an image".format(face_file_path)) + exit() + +# Convert to RGB since dlib uses RGB images +img = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2RGB) + +# Ask the detector to find the bounding boxes of each face. The 1 in the +# second argument indicates that we should upsample the image 1 time. This +# will make everything bigger and allow us to detect more faces. +dets = detector(img, 1) + +num_faces = len(dets) +if num_faces == 0: + print("Sorry, there were no faces found in '{}'".format(face_file_path)) + exit() + +# Find the 5 face landmarks we need to do the alignment. +faces = dlib.full_object_detections() +for detection in dets: + faces.append(sp(img, detection)) + +# Get the aligned face images +# Optionally: +# images = dlib.get_face_chips(img, faces, size=160, padding=0.25) +images = dlib.get_face_chips(img, faces, size=320) +for image in images: + cv_bgr_img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) + cv2.imshow('image',cv_bgr_img) + cv2.waitKey(0) + +# It is also possible to get a single chip +image = dlib.get_face_chip(img, faces[0]) +cv_bgr_img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) +cv2.imshow('image',cv_bgr_img) +cv2.waitKey(0) + +cv2.destroyAllWindows() + diff --git a/ml/dlib/python_examples/face_clustering.py b/ml/dlib/python_examples/face_clustering.py new file mode 100755 index 00000000..36261387 --- /dev/null +++ b/ml/dlib/python_examples/face_clustering.py @@ -0,0 +1,127 @@ +#!/usr/bin/python +# The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +# +# This example shows how to use dlib's face recognition tool for clustering using chinese_whispers. +# This is useful when you have a collection of photographs which you know are linked to +# a particular person, but the person may be photographed with multiple other people. +# In this example, we assume the largest cluster will contain photos of the common person in the +# collection of photographs. Then, we save extracted images of the face in the largest cluster in +# a 150x150 px format which is suitable for jittering and loading to perform metric learning (as shown +# in the dnn_metric_learning_on_images_ex.cpp example. +# https://github.com/davisking/dlib/blob/master/examples/dnn_metric_learning_on_images_ex.cpp +# +# COMPILING/INSTALLING THE DLIB PYTHON INTERFACE +# You can install dlib using the command: +# pip install dlib +# +# Alternatively, if you want to compile dlib yourself then go into the dlib +# root folder and run: +# python setup.py install +# or +# python setup.py install --yes USE_AVX_INSTRUCTIONS +# if you have a CPU that supports AVX instructions, since this makes some +# things run faster. This code will also use CUDA if you have CUDA and cuDNN +# installed. +# +# Compiling dlib should work on any operating system so long as you have +# CMake installed. On Ubuntu, this can be done easily by running the +# command: +# sudo apt-get install cmake +# +# Also note that this example requires scikit-image which can be installed +# via the command: +# pip install scikit-image +# Or downloaded from http://scikit-image.org/download.html. + +import sys +import os +import dlib +import glob +from skimage import io + +if len(sys.argv) != 5: + print( + "Call this program like this:\n" + " ./face_clustering.py shape_predictor_5_face_landmarks.dat dlib_face_recognition_resnet_model_v1.dat ../examples/faces output_folder\n" + "You can download a trained facial shape predictor and recognition model from:\n" + " http://dlib.net/files/shape_predictor_5_face_landmarks.dat.bz2\n" + " http://dlib.net/files/dlib_face_recognition_resnet_model_v1.dat.bz2") + exit() + +predictor_path = sys.argv[1] +face_rec_model_path = sys.argv[2] +faces_folder_path = sys.argv[3] +output_folder_path = sys.argv[4] + +# Load all the models we need: a detector to find the faces, a shape predictor +# to find face landmarks so we can precisely localize the face, and finally the +# face recognition model. +detector = dlib.get_frontal_face_detector() +sp = dlib.shape_predictor(predictor_path) +facerec = dlib.face_recognition_model_v1(face_rec_model_path) + +descriptors = [] +images = [] + +# Now find all the faces and compute 128D face descriptors for each face. +for f in glob.glob(os.path.join(faces_folder_path, "*.jpg")): + print("Processing file: {}".format(f)) + img = io.imread(f) + + # Ask the detector to find the bounding boxes of each face. The 1 in the + # second argument indicates that we should upsample the image 1 time. This + # will make everything bigger and allow us to detect more faces. + dets = detector(img, 1) + print("Number of faces detected: {}".format(len(dets))) + + # Now process each face we found. + for k, d in enumerate(dets): + # Get the landmarks/parts for the face in box d. + shape = sp(img, d) + + # Compute the 128D vector that describes the face in img identified by + # shape. + face_descriptor = facerec.compute_face_descriptor(img, shape) + descriptors.append(face_descriptor) + images.append((img, shape)) + +# Now let's cluster the faces. +labels = dlib.chinese_whispers_clustering(descriptors, 0.5) +num_classes = len(set(labels)) +print("Number of clusters: {}".format(num_classes)) + +# Find biggest class +biggest_class = None +biggest_class_length = 0 +for i in range(0, num_classes): + class_length = len([label for label in labels if label == i]) + if class_length > biggest_class_length: + biggest_class_length = class_length + biggest_class = i + +print("Biggest cluster id number: {}".format(biggest_class)) +print("Number of faces in biggest cluster: {}".format(biggest_class_length)) + +# Find the indices for the biggest class +indices = [] +for i, label in enumerate(labels): + if label == biggest_class: + indices.append(i) + +print("Indices of images in the biggest cluster: {}".format(str(indices))) + +# Ensure output directory exists +if not os.path.isdir(output_folder_path): + os.makedirs(output_folder_path) + +# Save the extracted faces +print("Saving faces in largest cluster to output folder...") +for i, index in enumerate(indices): + img, shape = images[index] + file_path = os.path.join(output_folder_path, "face_" + str(i)) + # The size and padding arguments are optional with default size=150x150 and padding=0.25 + dlib.save_face_chip(img, shape, file_path, size=150, padding=0.25) + + + + diff --git a/ml/dlib/python_examples/face_detector.py b/ml/dlib/python_examples/face_detector.py new file mode 100755 index 00000000..eed3732b --- /dev/null +++ b/ml/dlib/python_examples/face_detector.py @@ -0,0 +1,84 @@ +#!/usr/bin/python +# The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +# +# This example program shows how to find frontal human faces in an image. In +# particular, it shows how you can take a list of images from the command +# line and display each on the screen with red boxes overlaid on each human +# face. +# +# The examples/faces folder contains some jpg images of people. You can run +# this program on them and see the detections by executing the +# following command: +# ./face_detector.py ../examples/faces/*.jpg +# +# This face detector is made using the now classic Histogram of Oriented +# Gradients (HOG) feature combined with a linear classifier, an image +# pyramid, and sliding window detection scheme. This type of object detector +# is fairly general and capable of detecting many types of semi-rigid objects +# in addition to human faces. Therefore, if you are interested in making +# your own object detectors then read the train_object_detector.py example +# program. +# +# +# COMPILING/INSTALLING THE DLIB PYTHON INTERFACE +# You can install dlib using the command: +# pip install dlib +# +# Alternatively, if you want to compile dlib yourself then go into the dlib +# root folder and run: +# python setup.py install +# or +# python setup.py install --yes USE_AVX_INSTRUCTIONS +# if you have a CPU that supports AVX instructions, since this makes some +# things run faster. +# +# Compiling dlib should work on any operating system so long as you have +# CMake installed. On Ubuntu, this can be done easily by running the +# command: +# sudo apt-get install cmake +# +# Also note that this example requires scikit-image which can be installed +# via the command: +# pip install scikit-image +# Or downloaded from http://scikit-image.org/download.html. + +import sys + +import dlib +from skimage import io + + +detector = dlib.get_frontal_face_detector() +win = dlib.image_window() + +for f in sys.argv[1:]: + print("Processing file: {}".format(f)) + img = io.imread(f) + # The 1 in the second argument indicates that we should upsample the image + # 1 time. This will make everything bigger and allow us to detect more + # faces. + dets = detector(img, 1) + print("Number of faces detected: {}".format(len(dets))) + for i, d in enumerate(dets): + print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format( + i, d.left(), d.top(), d.right(), d.bottom())) + + win.clear_overlay() + win.set_image(img) + win.add_overlay(dets) + dlib.hit_enter_to_continue() + + +# Finally, if you really want to you can ask the detector to tell you the score +# for each detection. The score is bigger for more confident detections. +# The third argument to run is an optional adjustment to the detection threshold, +# where a negative value will return more detections and a positive value fewer. +# Also, the idx tells you which of the face sub-detectors matched. This can be +# used to broadly identify faces in different orientations. +if (len(sys.argv[1:]) > 0): + img = io.imread(sys.argv[1]) + dets, scores, idx = detector.run(img, 1, -1) + for i, d in enumerate(dets): + print("Detection {}, score: {}, face_type:{}".format( + d, scores[i], idx[i])) + diff --git a/ml/dlib/python_examples/face_jitter.py b/ml/dlib/python_examples/face_jitter.py new file mode 100755 index 00000000..ee959846 --- /dev/null +++ b/ml/dlib/python_examples/face_jitter.py @@ -0,0 +1,97 @@ +#!/usr/bin/python +# The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +# +# This example shows how faces were jittered and augmented to create training +# data for dlib's face recognition model. It takes an input image and +# disturbs the colors as well as applies random translations, rotations, and +# scaling. + +# +# COMPILING/INSTALLING THE DLIB PYTHON INTERFACE +# You can install dlib using the command: +# pip install dlib +# +# Alternatively, if you want to compile dlib yourself then go into the dlib +# root folder and run: +# python setup.py install +# or +# python setup.py install --yes USE_AVX_INSTRUCTIONS +# if you have a CPU that supports AVX instructions, since this makes some +# things run faster. This code will also use CUDA if you have CUDA and cuDNN +# installed. +# +# Compiling dlib should work on any operating system so long as you have +# CMake installed. On Ubuntu, this can be done easily by running the +# command: +# sudo apt-get install cmake +# +# Also note that this example requires OpenCV and Numpy which can be installed +# via the command: +# pip install opencv-python numpy +# +# The image file used in this example is in the public domain: +# https://commons.wikimedia.org/wiki/File:Tom_Cruise_avp_2014_4.jpg +import sys + +import dlib +import cv2 +import numpy as np + +def show_jittered_images(jittered_images): + ''' + Shows the specified jittered images one by one + ''' + for img in jittered_images: + cv_bgr_img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) + cv2.imshow('image',cv_bgr_img) + cv2.waitKey(0) + +if len(sys.argv) != 2: + print( + "Call this program like this:\n" + " ./face_jitter.py shape_predictor_5_face_landmarks.dat\n" + "You can download a trained facial shape predictor from:\n" + " http://dlib.net/files/shape_predictor_5_face_landmarks.dat.bz2\n") + exit() + +predictor_path = sys.argv[1] +face_file_path = "../examples/faces/Tom_Cruise_avp_2014_4.jpg" + +# Load all the models we need: a detector to find the faces, a shape predictor +# to find face landmarks so we can precisely localize the face +detector = dlib.get_frontal_face_detector() +sp = dlib.shape_predictor(predictor_path) + +# Load the image using OpenCV +bgr_img = cv2.imread(face_file_path) +if bgr_img is None: + print("Sorry, we could not load '{}' as an image".format(face_file_path)) + exit() + +# Convert to RGB since dlib uses RGB images +img = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2RGB) + +# Ask the detector to find the bounding boxes of each face. +dets = detector(img) + +num_faces = len(dets) + +# Find the 5 face landmarks we need to do the alignment. +faces = dlib.full_object_detections() +for detection in dets: + faces.append(sp(img, detection)) + +# Get the aligned face image and show it +image = dlib.get_face_chip(img, faces[0], size=320) +cv_bgr_img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) +cv2.imshow('image',cv_bgr_img) +cv2.waitKey(0) + +# Show 5 jittered images without data augmentation +jittered_images = dlib.jitter_image(image, num_jitters=5) +show_jittered_images(jittered_images) + +# Show 5 jittered images with data augmentation +jittered_images = dlib.jitter_image(image, num_jitters=5, disturb_colors=True) +show_jittered_images(jittered_images) +cv2.destroyAllWindows() diff --git a/ml/dlib/python_examples/face_landmark_detection.py b/ml/dlib/python_examples/face_landmark_detection.py new file mode 100755 index 00000000..35194131 --- /dev/null +++ b/ml/dlib/python_examples/face_landmark_detection.py @@ -0,0 +1,100 @@ +#!/usr/bin/python +# The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +# +# This example program shows how to find frontal human faces in an image and +# estimate their pose. The pose takes the form of 68 landmarks. These are +# points on the face such as the corners of the mouth, along the eyebrows, on +# the eyes, and so forth. +# +# The face detector we use is made using the classic Histogram of Oriented +# Gradients (HOG) feature combined with a linear classifier, an image pyramid, +# and sliding window detection scheme. The pose estimator was created by +# using dlib's implementation of the paper: +# One Millisecond Face Alignment with an Ensemble of Regression Trees by +# Vahid Kazemi and Josephine Sullivan, CVPR 2014 +# and was trained on the iBUG 300-W face landmark dataset (see +# https://ibug.doc.ic.ac.uk/resources/facial-point-annotations/): +# C. Sagonas, E. Antonakos, G, Tzimiropoulos, S. Zafeiriou, M. Pantic. +# 300 faces In-the-wild challenge: Database and results. +# Image and Vision Computing (IMAVIS), Special Issue on Facial Landmark Localisation "In-The-Wild". 2016. +# You can get the trained model file from: +# http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2. +# Note that the license for the iBUG 300-W dataset excludes commercial use. +# So you should contact Imperial College London to find out if it's OK for +# you to use this model file in a commercial product. +# +# +# Also, note that you can train your own models using dlib's machine learning +# tools. See train_shape_predictor.py to see an example. +# +# +# COMPILING/INSTALLING THE DLIB PYTHON INTERFACE +# You can install dlib using the command: +# pip install dlib +# +# Alternatively, if you want to compile dlib yourself then go into the dlib +# root folder and run: +# python setup.py install +# or +# python setup.py install --yes USE_AVX_INSTRUCTIONS +# if you have a CPU that supports AVX instructions, since this makes some +# things run faster. +# +# Compiling dlib should work on any operating system so long as you have +# CMake installed. On Ubuntu, this can be done easily by running the +# command: +# sudo apt-get install cmake +# +# Also note that this example requires scikit-image which can be installed +# via the command: +# pip install scikit-image +# Or downloaded from http://scikit-image.org/download.html. + +import sys +import os +import dlib +import glob +from skimage import io + +if len(sys.argv) != 3: + print( + "Give the path to the trained shape predictor model as the first " + "argument and then the directory containing the facial images.\n" + "For example, if you are in the python_examples folder then " + "execute this program by running:\n" + " ./face_landmark_detection.py shape_predictor_68_face_landmarks.dat ../examples/faces\n" + "You can download a trained facial shape predictor from:\n" + " http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2") + exit() + +predictor_path = sys.argv[1] +faces_folder_path = sys.argv[2] + +detector = dlib.get_frontal_face_detector() +predictor = dlib.shape_predictor(predictor_path) +win = dlib.image_window() + +for f in glob.glob(os.path.join(faces_folder_path, "*.jpg")): + print("Processing file: {}".format(f)) + img = io.imread(f) + + win.clear_overlay() + win.set_image(img) + + # Ask the detector to find the bounding boxes of each face. The 1 in the + # second argument indicates that we should upsample the image 1 time. This + # will make everything bigger and allow us to detect more faces. + dets = detector(img, 1) + print("Number of faces detected: {}".format(len(dets))) + for k, d in enumerate(dets): + print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format( + k, d.left(), d.top(), d.right(), d.bottom())) + # Get the landmarks/parts for the face in box d. + shape = predictor(img, d) + print("Part 0: {}, Part 1: {} ...".format(shape.part(0), + shape.part(1))) + # Draw the face landmarks on the screen. + win.add_overlay(shape) + + win.add_overlay(dets) + dlib.hit_enter_to_continue() diff --git a/ml/dlib/python_examples/face_recognition.py b/ml/dlib/python_examples/face_recognition.py new file mode 100755 index 00000000..da2bdbc5 --- /dev/null +++ b/ml/dlib/python_examples/face_recognition.py @@ -0,0 +1,123 @@ +#!/usr/bin/python +# The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +# +# This example shows how to use dlib's face recognition tool. This tool maps +# an image of a human face to a 128 dimensional vector space where images of +# the same person are near to each other and images from different people are +# far apart. Therefore, you can perform face recognition by mapping faces to +# the 128D space and then checking if their Euclidean distance is small +# enough. +# +# When using a distance threshold of 0.6, the dlib model obtains an accuracy +# of 99.38% on the standard LFW face recognition benchmark, which is +# comparable to other state-of-the-art methods for face recognition as of +# February 2017. This accuracy means that, when presented with a pair of face +# images, the tool will correctly identify if the pair belongs to the same +# person or is from different people 99.38% of the time. +# +# Finally, for an in-depth discussion of how dlib's tool works you should +# refer to the C++ example program dnn_face_recognition_ex.cpp and the +# attendant documentation referenced therein. +# +# +# +# +# COMPILING/INSTALLING THE DLIB PYTHON INTERFACE +# You can install dlib using the command: +# pip install dlib +# +# Alternatively, if you want to compile dlib yourself then go into the dlib +# root folder and run: +# python setup.py install +# or +# python setup.py install --yes USE_AVX_INSTRUCTIONS +# if you have a CPU that supports AVX instructions, since this makes some +# things run faster. This code will also use CUDA if you have CUDA and cuDNN +# installed. +# +# Compiling dlib should work on any operating system so long as you have +# CMake installed. On Ubuntu, this can be done easily by running the +# command: +# sudo apt-get install cmake +# +# Also note that this example requires scikit-image which can be installed +# via the command: +# pip install scikit-image +# Or downloaded from http://scikit-image.org/download.html. + +import sys +import os +import dlib +import glob +from skimage import io + +if len(sys.argv) != 4: + print( + "Call this program like this:\n" + " ./face_recognition.py shape_predictor_5_face_landmarks.dat dlib_face_recognition_resnet_model_v1.dat ../examples/faces\n" + "You can download a trained facial shape predictor and recognition model from:\n" + " http://dlib.net/files/shape_predictor_5_face_landmarks.dat.bz2\n" + " http://dlib.net/files/dlib_face_recognition_resnet_model_v1.dat.bz2") + exit() + +predictor_path = sys.argv[1] +face_rec_model_path = sys.argv[2] +faces_folder_path = sys.argv[3] + +# Load all the models we need: a detector to find the faces, a shape predictor +# to find face landmarks so we can precisely localize the face, and finally the +# face recognition model. +detector = dlib.get_frontal_face_detector() +sp = dlib.shape_predictor(predictor_path) +facerec = dlib.face_recognition_model_v1(face_rec_model_path) + +win = dlib.image_window() + +# Now process all the images +for f in glob.glob(os.path.join(faces_folder_path, "*.jpg")): + print("Processing file: {}".format(f)) + img = io.imread(f) + + win.clear_overlay() + win.set_image(img) + + # Ask the detector to find the bounding boxes of each face. The 1 in the + # second argument indicates that we should upsample the image 1 time. This + # will make everything bigger and allow us to detect more faces. + dets = detector(img, 1) + print("Number of faces detected: {}".format(len(dets))) + + # Now process each face we found. + for k, d in enumerate(dets): + print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format( + k, d.left(), d.top(), d.right(), d.bottom())) + # Get the landmarks/parts for the face in box d. + shape = sp(img, d) + # Draw the face landmarks on the screen so we can see what face is currently being processed. + win.clear_overlay() + win.add_overlay(d) + win.add_overlay(shape) + + # Compute the 128D vector that describes the face in img identified by + # shape. In general, if two face descriptor vectors have a Euclidean + # distance between them less than 0.6 then they are from the same + # person, otherwise they are from different people. Here we just print + # the vector to the screen. + face_descriptor = facerec.compute_face_descriptor(img, shape) + print(face_descriptor) + # It should also be noted that you can also call this function like this: + # face_descriptor = facerec.compute_face_descriptor(img, shape, 100) + # The version of the call without the 100 gets 99.13% accuracy on LFW + # while the version with 100 gets 99.38%. However, the 100 makes the + # call 100x slower to execute, so choose whatever version you like. To + # explain a little, the 3rd argument tells the code how many times to + # jitter/resample the image. When you set it to 100 it executes the + # face descriptor extraction 100 times on slightly modified versions of + # the face and returns the average result. You could also pick a more + # middle value, such as 10, which is only 10x slower but still gets an + # LFW accuracy of 99.3%. + + + dlib.hit_enter_to_continue() + + diff --git a/ml/dlib/python_examples/find_candidate_object_locations.py b/ml/dlib/python_examples/find_candidate_object_locations.py new file mode 100755 index 00000000..a5c38642 --- /dev/null +++ b/ml/dlib/python_examples/find_candidate_object_locations.py @@ -0,0 +1,54 @@ +#!/usr/bin/python +# +# This example shows how to use find_candidate_object_locations(). The +# function takes an input image and generates a set of candidate rectangles +# which are expected to bound any objects in the image. +# It is based on the paper: +# Segmentation as Selective Search for Object Recognition by Koen E. A. van de Sande, et al. +# +# Typically, you would use this as part of an object detection pipeline. +# find_candidate_object_locations() nominates boxes that might contain an +# object and you then run some expensive classifier on each one and throw away +# the false alarms. Since find_candidate_object_locations() will only generate +# a few thousand rectangles it is much faster than scanning all possible +# rectangles inside an image. +# +# +# COMPILING/INSTALLING THE DLIB PYTHON INTERFACE +# You can install dlib using the command: +# pip install dlib +# +# Alternatively, if you want to compile dlib yourself then go into the dlib +# root folder and run: +# python setup.py install +# or +# python setup.py install --yes USE_AVX_INSTRUCTIONS +# if you have a CPU that supports AVX instructions, since this makes some +# things run faster. +# +# Compiling dlib should work on any operating system so long as you have +# CMake installed. On Ubuntu, this can be done easily by running the +# command: +# sudo apt-get install cmake +# +# Also note that this example requires scikit-image which can be installed +# via the command: +# pip install scikit-image +# Or downloaded from http://scikit-image.org/download.html. + + + +import dlib +from skimage import io + +image_file = '../examples/faces/2009_004587.jpg' +img = io.imread(image_file) + +# Locations of candidate objects will be saved into rects +rects = [] +dlib.find_candidate_object_locations(img, rects, min_size=500) + +print("number of rectangles found {}".format(len(rects))) +for k, d in enumerate(rects): + print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format( + k, d.left(), d.top(), d.right(), d.bottom())) diff --git a/ml/dlib/python_examples/global_optimization.py b/ml/dlib/python_examples/global_optimization.py new file mode 100755 index 00000000..e3fb3f8c --- /dev/null +++ b/ml/dlib/python_examples/global_optimization.py @@ -0,0 +1,47 @@ +#!/usr/bin/python +# The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +# +# +# This is an example illustrating the use of the global optimization routine, +# find_min_global(), from the dlib C++ Library. This is a tool for finding the +# inputs to a function that result in the function giving its minimal output. +# This is a very useful tool for hyper parameter search when applying machine +# learning methods. There are also many other applications for this kind of +# general derivative free optimization. However, in this example program, we +# simply show how to call the method. For that, we use a common global +# optimization test function, as you can see below. +# +# +# COMPILING/INSTALLING THE DLIB PYTHON INTERFACE +# You can install dlib using the command: +# pip install dlib +# +# Alternatively, if you want to compile dlib yourself then go into the dlib +# root folder and run: +# python setup.py install +# +# Compiling dlib should work on any operating system so long as you have +# CMake installed. On Ubuntu, this can be done easily by running the +# command: +# sudo apt-get install cmake +# + +import dlib +from math import sin,cos,pi,exp,sqrt + +# This is a standard test function for these kinds of optimization problems. +# It has a bunch of local minima, with the global minimum resulting in +# holder_table()==-19.2085025679. +def holder_table(x0,x1): + return -abs(sin(x0)*cos(x1)*exp(abs(1-sqrt(x0*x0+x1*x1)/pi))) + +# Find the optimal inputs to holder_table(). The print statements that follow +# show that find_min_global() finds the optimal settings to high precision. +x,y = dlib.find_min_global(holder_table, + [-10,-10], # Lower bound constraints on x0 and x1 respectively + [10,10], # Upper bound constraints on x0 and x1 respectively + 80) # The number of times find_min_global() will call holder_table() + +print("optimal inputs: {}".format(x)); +print("optimal output: {}".format(y)); + diff --git a/ml/dlib/python_examples/max_cost_assignment.py b/ml/dlib/python_examples/max_cost_assignment.py new file mode 100755 index 00000000..8e284e6c --- /dev/null +++ b/ml/dlib/python_examples/max_cost_assignment.py @@ -0,0 +1,57 @@ +#!/usr/bin/python +# The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +# +# This simple example shows how to call dlib's optimal linear assignment +# problem solver. It is an implementation of the famous Hungarian algorithm +# and is quite fast, operating in O(N^3) time. +# +# COMPILING/INSTALLING THE DLIB PYTHON INTERFACE +# You can install dlib using the command: +# pip install dlib +# +# Alternatively, if you want to compile dlib yourself then go into the dlib +# root folder and run: +# python setup.py install +# or +# python setup.py install --yes USE_AVX_INSTRUCTIONS +# if you have a CPU that supports AVX instructions, since this makes some +# things run faster. +# +# Compiling dlib should work on any operating system so long as you have +# CMake installed. On Ubuntu, this can be done easily by running the +# command: +# sudo apt-get install cmake +# + +import dlib + +# Let's imagine you need to assign N people to N jobs. Additionally, each +# person will make your company a certain amount of money at each job, but each +# person has different skills so they are better at some jobs and worse at +# others. You would like to find the best way to assign people to these jobs. +# In particular, you would like to maximize the amount of money the group makes +# as a whole. This is an example of an assignment problem and is what is solved +# by the dlib.max_cost_assignment() routine. + +# So in this example, let's imagine we have 3 people and 3 jobs. We represent +# the amount of money each person will produce at each job with a cost matrix. +# Each row corresponds to a person and each column corresponds to a job. So for +# example, below we are saying that person 0 will make $1 at job 0, $2 at job 1, +# and $6 at job 2. +cost = dlib.matrix([[1, 2, 6], + [5, 3, 6], + [4, 5, 0]]) + +# To find out the best assignment of people to jobs we just need to call this +# function. +assignment = dlib.max_cost_assignment(cost) + +# This prints optimal assignments: [2, 0, 1] +# which indicates that we should assign the person from the first row of the +# cost matrix to job 2, the middle row person to job 0, and the bottom row +# person to job 1. +print("Optimal assignments: {}".format(assignment)) + +# This prints optimal cost: 16.0 +# which is correct since our optimal assignment is 6+5+5. +print("Optimal cost: {}".format(dlib.assignment_cost(cost, assignment))) diff --git a/ml/dlib/python_examples/requirements.txt b/ml/dlib/python_examples/requirements.txt new file mode 100644 index 00000000..8fa92c8a --- /dev/null +++ b/ml/dlib/python_examples/requirements.txt @@ -0,0 +1,3 @@ +scikit-image>=0.9.3 +opencv-python +numpy diff --git a/ml/dlib/python_examples/sequence_segmenter.py b/ml/dlib/python_examples/sequence_segmenter.py new file mode 100755 index 00000000..335e475f --- /dev/null +++ b/ml/dlib/python_examples/sequence_segmenter.py @@ -0,0 +1,197 @@ +#!/usr/bin/python +# The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +# +# This example shows how to use dlib to learn to do sequence segmentation. In +# a sequence segmentation task we are given a sequence of objects (e.g. words in +# a sentence) and we are supposed to detect certain subsequences (e.g. the names +# of people). Therefore, in the code below we create some very simple training +# sequences and use them to learn a sequence segmentation model. In particular, +# our sequences will be sentences represented as arrays of words and our task +# will be to learn to identify person names. Once we have our segmentation +# model we can use it to find names in new sentences, as we will show. +# +# COMPILING/INSTALLING THE DLIB PYTHON INTERFACE +# You can install dlib using the command: +# pip install dlib +# +# Alternatively, if you want to compile dlib yourself then go into the dlib +# root folder and run: +# python setup.py install +# or +# python setup.py install --yes USE_AVX_INSTRUCTIONS +# if you have a CPU that supports AVX instructions, since this makes some +# things run faster. +# +# Compiling dlib should work on any operating system so long as you have +# CMake installed. On Ubuntu, this can be done easily by running the +# command: +# sudo apt-get install cmake +# +import sys +import dlib + + +# The sequence segmentation models we work with in this example are chain +# structured conditional random field style models. Therefore, central to a +# sequence segmentation model is some method for converting the elements of a +# sequence into feature vectors. That is, while you might start out representing +# your sequence as an array of strings, the dlib interface works in terms of +# arrays of feature vectors. Each feature vector should capture important +# information about its corresponding element in the original raw sequence. So +# in this example, since we work with sequences of words and want to identify +# names, we will create feature vectors that tell us if the word is capitalized +# or not. In our simple data, this will be enough to identify names. +# Therefore, we define sentence_to_vectors() which takes a sentence represented +# as a string and converts it into an array of words and then associates a +# feature vector with each word. +def sentence_to_vectors(sentence): + # Create an empty array of vectors + vects = dlib.vectors() + for word in sentence.split(): + # Our vectors are very simple 1-dimensional vectors. The value of the + # single feature is 1 if the first letter of the word is capitalized and + # 0 otherwise. + if word[0].isupper(): + vects.append(dlib.vector([1])) + else: + vects.append(dlib.vector([0])) + return vects + + +# Dlib also supports the use of a sparse vector representation. This is more +# efficient than the above form when you have very high dimensional vectors that +# are mostly full of zeros. In dlib, each sparse vector is represented as an +# array of pair objects. Each pair contains an index and value. Any index not +# listed in the vector is implicitly associated with a value of zero. +# Additionally, when using sparse vectors with dlib.train_sequence_segmenter() +# you can use "unsorted" sparse vectors. This means you can add the index/value +# pairs into your sparse vectors in any order you want and don't need to worry +# about them being in sorted order. +def sentence_to_sparse_vectors(sentence): + vects = dlib.sparse_vectors() + has_cap = dlib.sparse_vector() + no_cap = dlib.sparse_vector() + # make has_cap equivalent to dlib.vector([1]) + has_cap.append(dlib.pair(0, 1)) + + # Since we didn't add anything to no_cap it is equivalent to + # dlib.vector([0]) + for word in sentence.split(): + if word[0].isupper(): + vects.append(has_cap) + else: + vects.append(no_cap) + return vects + + +def print_segment(sentence, names): + words = sentence.split() + for name in names: + for i in name: + sys.stdout.write(words[i] + " ") + sys.stdout.write("\n") + + + +# Now let's make some training data. Each example is a sentence as well as a +# set of ranges which indicate the locations of any names. +names = dlib.ranges() # make an array of dlib.range objects. +segments = dlib.rangess() # make an array of arrays of dlib.range objects. +sentences = [] + +sentences.append("The other day I saw a man named Jim Smith") +# We want to detect person names. So we note that the name is located within +# the range [8, 10). Note that we use half open ranges to identify segments. +# So in this case, the segment identifies the string "Jim Smith". +names.append(dlib.range(8, 10)) +segments.append(names) +names.clear() # make names empty for use again below + +sentences.append("Davis King is the main author of the dlib Library") +names.append(dlib.range(0, 2)) +segments.append(names) +names.clear() + +sentences.append("Bob Jones is a name and so is George Clinton") +names.append(dlib.range(0, 2)) +names.append(dlib.range(8, 10)) +segments.append(names) +names.clear() + +sentences.append("My dog is named Bob Barker") +names.append(dlib.range(4, 6)) +segments.append(names) +names.clear() + +sentences.append("ABC is an acronym but John James Smith is a name") +names.append(dlib.range(5, 8)) +segments.append(names) +names.clear() + +sentences.append("No names in this sentence at all") +segments.append(names) +names.clear() + + +# Now before we can pass these training sentences to the dlib tools we need to +# convert them into arrays of vectors as discussed above. We can use either a +# sparse or dense representation depending on our needs. In this example, we +# show how to do it both ways. +use_sparse_vects = False +if use_sparse_vects: + # Make an array of arrays of dlib.sparse_vector objects. + training_sequences = dlib.sparse_vectorss() + for s in sentences: + training_sequences.append(sentence_to_sparse_vectors(s)) +else: + # Make an array of arrays of dlib.vector objects. + training_sequences = dlib.vectorss() + for s in sentences: + training_sequences.append(sentence_to_vectors(s)) + +# Now that we have a simple training set we can train a sequence segmenter. +# However, the sequence segmentation trainer has some optional parameters we can +# set. These parameters determine properties of the segmentation model we will +# learn. See the dlib documentation for the sequence_segmenter object for a +# full discussion of their meanings. +params = dlib.segmenter_params() +params.window_size = 3 +params.use_high_order_features = True +params.use_BIO_model = True +# This is the common SVM C parameter. Larger values encourage the trainer to +# attempt to fit the data exactly but might overfit. In general, you determine +# this parameter by cross-validation. +params.C = 10 + +# Train a model. The model object is responsible for predicting the locations +# of names in new sentences. +model = dlib.train_sequence_segmenter(training_sequences, segments, params) + +# Let's print out the things the model thinks are names. The output is a set +# of ranges which are predicted to contain names. If you run this example +# program you will see that it gets them all correct. +for i, s in enumerate(sentences): + print_segment(s, model(training_sequences[i])) + +# Let's also try segmenting a new sentence. This will print out "Bob Bucket". +# Note that we need to remember to use the same vector representation as we used +# during training. +test_sentence = "There once was a man from Nantucket " \ + "whose name rhymed with Bob Bucket" +if use_sparse_vects: + print_segment(test_sentence, + model(sentence_to_sparse_vectors(test_sentence))) +else: + print_segment(test_sentence, model(sentence_to_vectors(test_sentence))) + +# We can also measure the accuracy of a model relative to some labeled data. +# This statement prints the precision, recall, and F1-score of the model +# relative to the data in training_sequences/segments. +print("Test on training data: {}".format( + dlib.test_sequence_segmenter(model, training_sequences, segments))) + +# We can also do 5-fold cross-validation and print the resulting precision, +# recall, and F1-score. +print("Cross validation: {}".format( + dlib.cross_validate_sequence_segmenter(training_sequences, segments, 5, + params))) diff --git a/ml/dlib/python_examples/svm_binary_classifier.py b/ml/dlib/python_examples/svm_binary_classifier.py new file mode 100755 index 00000000..d114c815 --- /dev/null +++ b/ml/dlib/python_examples/svm_binary_classifier.py @@ -0,0 +1,68 @@ +#!/usr/bin/python +# The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +# +# +# This is an example illustrating the use of a binary SVM classifier tool from +# the dlib C++ Library. In this example, we will create a simple test dataset +# and show how to learn a classifier from it. +# +# +# COMPILING/INSTALLING THE DLIB PYTHON INTERFACE +# You can install dlib using the command: +# pip install dlib +# +# Alternatively, if you want to compile dlib yourself then go into the dlib +# root folder and run: +# python setup.py install +# or +# python setup.py install --yes USE_AVX_INSTRUCTIONS +# if you have a CPU that supports AVX instructions, since this makes some +# things run faster. +# +# Compiling dlib should work on any operating system so long as you have +# CMake installed. On Ubuntu, this can be done easily by running the +# command: +# sudo apt-get install cmake +# + +import dlib +try: + import cPickle as pickle +except ImportError: + import pickle + +x = dlib.vectors() +y = dlib.array() + +# Make a training dataset. Here we have just two training examples. Normally +# you would use a much larger training dataset, but for the purpose of example +# this is plenty. For binary classification, the y labels should all be either +1 or -1. +x.append(dlib.vector([1, 2, 3, -1, -2, -3])) +y.append(+1) + +x.append(dlib.vector([-1, -2, -3, 1, 2, 3])) +y.append(-1) + + +# Now make a training object. This object is responsible for turning a +# training dataset into a prediction model. This one here is a SVM trainer +# that uses a linear kernel. If you wanted to use a RBF kernel or histogram +# intersection kernel you could change it to one of these lines: +# svm = dlib.svm_c_trainer_histogram_intersection() +# svm = dlib.svm_c_trainer_radial_basis() +svm = dlib.svm_c_trainer_linear() +svm.be_verbose() +svm.set_c(10) + +# Now train the model. The return value is the trained model capable of making predictions. +classifier = svm.train(x, y) + +# Now run the model on our data and look at the results. +print("prediction for first sample: {}".format(classifier(x[0]))) +print("prediction for second sample: {}".format(classifier(x[1]))) + + +# classifier models can also be pickled in the same was as any other python object. +with open('saved_model.pickle', 'wb') as handle: + pickle.dump(classifier, handle, 2) + diff --git a/ml/dlib/python_examples/svm_rank.py b/ml/dlib/python_examples/svm_rank.py new file mode 100755 index 00000000..dad64227 --- /dev/null +++ b/ml/dlib/python_examples/svm_rank.py @@ -0,0 +1,155 @@ +#!/usr/bin/python +# The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +# +# +# This is an example illustrating the use of the SVM-Rank tool from the dlib C++ +# Library. This is a tool useful for learning to rank objects. For example, +# you might use it to learn to rank web pages in response to a user's query. +# The idea being to rank the most relevant pages higher than non-relevant pages. +# +# In this example, we will create a simple test dataset and show how to learn a +# ranking function from it. The purpose of the function will be to give +# "relevant" objects higher scores than "non-relevant" objects. The idea is +# that you use this score to order the objects so that the most relevant objects +# come to the top of the ranked list. +# +# +# COMPILING/INSTALLING THE DLIB PYTHON INTERFACE +# You can install dlib using the command: +# pip install dlib +# +# Alternatively, if you want to compile dlib yourself then go into the dlib +# root folder and run: +# python setup.py install +# or +# python setup.py install --yes USE_AVX_INSTRUCTIONS +# if you have a CPU that supports AVX instructions, since this makes some +# things run faster. +# +# Compiling dlib should work on any operating system so long as you have +# CMake installed. On Ubuntu, this can be done easily by running the +# command: +# sudo apt-get install cmake +# + +import dlib + + +# Now let's make some testing data. To make it really simple, let's suppose +# that we are ranking 2D vectors and that vectors with positive values in the +# first dimension should rank higher than other vectors. So what we do is make +# examples of relevant (i.e. high ranking) and non-relevant (i.e. low ranking) +# vectors and store them into a ranking_pair object like so: +data = dlib.ranking_pair() +# Here we add two examples. In real applications, you would want lots of +# examples of relevant and non-relevant vectors. +data.relevant.append(dlib.vector([1, 0])) +data.nonrelevant.append(dlib.vector([0, 1])) + +# Now that we have some data, we can use a machine learning method to learn a +# function that will give high scores to the relevant vectors and low scores to +# the non-relevant vectors. +trainer = dlib.svm_rank_trainer() +# Note that the trainer object has some parameters that control how it behaves. +# For example, since this is the SVM-Rank algorithm it has a C parameter that +# controls the trade-off between trying to fit the training data exactly or +# selecting a "simpler" solution which might generalize better. +trainer.c = 10 + +# So let's do the training. +rank = trainer.train(data) + +# Now if you call rank on a vector it will output a ranking score. In +# particular, the ranking score for relevant vectors should be larger than the +# score for non-relevant vectors. +print("Ranking score for a relevant vector: {}".format( + rank(data.relevant[0]))) +print("Ranking score for a non-relevant vector: {}".format( + rank(data.nonrelevant[0]))) +# The output is the following: +# ranking score for a relevant vector: 0.5 +# ranking score for a non-relevant vector: -0.5 + + +# If we want an overall measure of ranking accuracy we can compute the ordering +# accuracy and mean average precision values by calling test_ranking_function(). +# In this case, the ordering accuracy tells us how often a non-relevant vector +# was ranked ahead of a relevant vector. In this case, it returns 1 for both +# metrics, indicating that the rank function outputs a perfect ranking. +print(dlib.test_ranking_function(rank, data)) + +# The ranking scores are computed by taking the dot product between a learned +# weight vector and a data vector. If you want to see the learned weight vector +# you can display it like so: +print("Weights: {}".format(rank.weights)) +# In this case the weights are: +# 0.5 +# -0.5 + +# In the above example, our data contains just two sets of objects. The +# relevant set and non-relevant set. The trainer is attempting to find a +# ranking function that gives every relevant vector a higher score than every +# non-relevant vector. Sometimes what you want to do is a little more complex +# than this. +# +# For example, in the web page ranking example we have to rank pages based on a +# user's query. In this case, each query will have its own set of relevant and +# non-relevant documents. What might be relevant to one query may well be +# non-relevant to another. So in this case we don't have a single global set of +# relevant web pages and another set of non-relevant web pages. +# +# To handle cases like this, we can simply give multiple ranking_pair instances +# to the trainer. Therefore, each ranking_pair would represent the +# relevant/non-relevant sets for a particular query. An example is shown below +# (for simplicity, we reuse our data from above to make 4 identical "queries"). +queries = dlib.ranking_pairs() +queries.append(data) +queries.append(data) +queries.append(data) +queries.append(data) + +# We can train just as before. +rank = trainer.train(queries) + +# Now that we have multiple ranking_pair instances, we can also use +# cross_validate_ranking_trainer(). This performs cross-validation by splitting +# the queries up into folds. That is, it lets the trainer train on a subset of +# ranking_pair instances and tests on the rest. It does this over 4 different +# splits and returns the overall ranking accuracy based on the held out data. +# Just like test_ranking_function(), it reports both the ordering accuracy and +# mean average precision. +print("Cross validation results: {}".format( + dlib.cross_validate_ranking_trainer(trainer, queries, 4))) + +# Finally, note that the ranking tools also support the use of sparse vectors in +# addition to dense vectors (which we used above). So if we wanted to do +# exactly what we did in the first part of the example program above but using +# sparse vectors we would do it like so: + +data = dlib.sparse_ranking_pair() +samp = dlib.sparse_vector() + +# Make samp represent the same vector as dlib.vector([1, 0]). In dlib, a sparse +# vector is just an array of pair objects. Each pair stores an index and a +# value. Moreover, the svm-ranking tools require sparse vectors to be sorted +# and to have unique indices. This means that the indices are listed in +# increasing order and no index value shows up more than once. If necessary, +# you can use the dlib.make_sparse_vector() routine to make a sparse vector +# object properly sorted and contain unique indices. +samp.append(dlib.pair(0, 1)) +data.relevant.append(samp) + +# Now make samp represent the same vector as dlib.vector([0, 1]) +samp.clear() +samp.append(dlib.pair(1, 1)) +data.nonrelevant.append(samp) + +trainer = dlib.svm_rank_trainer_sparse() +rank = trainer.train(data) +print("Ranking score for a relevant vector: {}".format( + rank(data.relevant[0]))) +print("Ranking score for a non-relevant vector: {}".format( + rank(data.nonrelevant[0]))) +# Just as before, the output is the following: +# ranking score for a relevant vector: 0.5 +# ranking score for a non-relevant vector: -0.5 diff --git a/ml/dlib/python_examples/svm_struct.py b/ml/dlib/python_examples/svm_struct.py new file mode 100755 index 00000000..7f0004cc --- /dev/null +++ b/ml/dlib/python_examples/svm_struct.py @@ -0,0 +1,343 @@ +#!/usr/bin/python +# The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +# +# This is an example illustrating the use of the structural SVM solver from +# the dlib C++ Library. Therefore, this example teaches you the central ideas +# needed to setup a structural SVM model for your machine learning problems. To +# illustrate the process, we use dlib's structural SVM solver to learn the +# parameters of a simple multi-class classifier. We first discuss the +# multi-class classifier model and then walk through using the structural SVM +# tools to find the parameters of this classification model. As an aside, +# dlib's C++ interface to the structural SVM solver is threaded. So on a +# multi-core computer it is significantly faster than using the python +# interface. So consider using the C++ interface instead if you find that +# running it in python is slow. +# +# +# COMPILING/INSTALLING THE DLIB PYTHON INTERFACE +# You can install dlib using the command: +# pip install dlib +# +# Alternatively, if you want to compile dlib yourself then go into the dlib +# root folder and run: +# python setup.py install +# or +# python setup.py install --yes USE_AVX_INSTRUCTIONS +# if you have a CPU that supports AVX instructions, since this makes some +# things run faster. +# +# Compiling dlib should work on any operating system so long as you have +# CMake installed. On Ubuntu, this can be done easily by running the +# command: +# sudo apt-get install cmake +# + +import dlib + + +def main(): + # In this example, we have three types of samples: class 0, 1, or 2. That + # is, each of our sample vectors falls into one of three classes. To keep + # this example very simple, each sample vector is zero everywhere except at + # one place. The non-zero dimension of each vector determines the class of + # the vector. So for example, the first element of samples has a class of 1 + # because samples[0][1] is the only non-zero element of samples[0]. + samples = [[0, 2, 0], [1, 0, 0], [0, 4, 0], [0, 0, 3]] + # Since we want to use a machine learning method to learn a 3-class + # classifier we need to record the labels of our samples. Here samples[i] + # has a class label of labels[i]. + labels = [1, 0, 1, 2] + + # Now that we have some training data we can tell the structural SVM to + # learn the parameters of our 3-class classifier model. The details of this + # will be explained later. For now, just note that it finds the weights + # (i.e. a vector of real valued parameters) such that predict_label(weights, + # sample) always returns the correct label for a sample vector. + problem = ThreeClassClassifierProblem(samples, labels) + weights = dlib.solve_structural_svm_problem(problem) + + # Print the weights and then evaluate predict_label() on each of our + # training samples. Note that the correct label is predicted for each + # sample. + print(weights) + for k, s in enumerate(samples): + print("Predicted label for sample[{0}]: {1}".format( + k, predict_label(weights, s))) + + +def predict_label(weights, sample): + """Given the 9-dimensional weight vector which defines a 3 class classifier, + predict the class of the given 3-dimensional sample vector. Therefore, the + output of this function is either 0, 1, or 2 (i.e. one of the three possible + labels).""" + + # Our 3-class classifier model can be thought of as containing 3 separate + # linear classifiers. So to predict the class of a sample vector we + # evaluate each of these three classifiers and then whatever classifier has + # the largest output "wins" and predicts the label of the sample. This is + # the popular one-vs-all multi-class classifier model. + # Keeping this in mind, the code below simply pulls the three separate + # weight vectors out of weights and then evaluates each against sample. The + # individual classifier scores are stored in scores and the highest scoring + # index is returned as the label. + w0 = weights[0:3] + w1 = weights[3:6] + w2 = weights[6:9] + scores = [dot(w0, sample), dot(w1, sample), dot(w2, sample)] + max_scoring_label = scores.index(max(scores)) + return max_scoring_label + + +def dot(a, b): + """Compute the dot product between the two vectors a and b.""" + return sum(i * j for i, j in zip(a, b)) + + +################################################################################ + + +class ThreeClassClassifierProblem: + # Now we arrive at the meat of this example program. To use the + # dlib.solve_structural_svm_problem() routine you need to define an object + # which tells the structural SVM solver what to do for your problem. In + # this example, this is done by defining the ThreeClassClassifierProblem + # object. Before we get into the details, we first discuss some background + # information on structural SVMs. + # + # A structural SVM is a supervised machine learning method for learning to + # predict complex outputs. This is contrasted with a binary classifier + # which makes only simple yes/no predictions. A structural SVM, on the + # other hand, can learn to predict complex outputs such as entire parse + # trees or DNA sequence alignments. To do this, it learns a function F(x,y) + # which measures how well a particular data sample x matches a label y, + # where a label is potentially a complex thing like a parse tree. However, + # to keep this example program simple we use only a 3 category label output. + # + # At test time, the best label for a new x is given by the y which + # maximizes F(x,y). To put this into the context of the current example, + # F(x,y) computes the score for a given sample and class label. The + # predicted class label is therefore whatever value of y which makes F(x,y) + # the biggest. This is exactly what predict_label() does. That is, it + # computes F(x,0), F(x,1), and F(x,2) and then reports which label has the + # biggest value. + # + # At a high level, a structural SVM can be thought of as searching the + # parameter space of F(x,y) for the set of parameters that make the + # following inequality true as often as possible: + # F(x_i,y_i) > max{over all incorrect labels of x_i} F(x_i, y_incorrect) + # That is, it seeks to find the parameter vector such that F(x,y) always + # gives the highest score to the correct output. To define the structural + # SVM optimization problem precisely, we first introduce some notation: + # - let PSI(x,y) == the joint feature vector for input x and a label y + # - let F(x,y|w) == dot(w,PSI(x,y)). + # (we use the | notation to emphasize that F() has the parameter vector + # of weights called w) + # - let LOSS(idx,y) == the loss incurred for predicting that the + # idx-th training sample has a label of y. Note that LOSS() + # should always be >= 0 and should become exactly 0 when y is the + # correct label for the idx-th sample. Moreover, it should notionally + # indicate how bad it is to predict y for the idx'th sample. + # - let x_i == the i-th training sample. + # - let y_i == the correct label for the i-th training sample. + # - The number of data samples is N. + # + # Then the optimization problem solved by a structural SVM using + # dlib.solve_structural_svm_problem() is the following: + # Minimize: h(w) == 0.5*dot(w,w) + C*R(w) + # + # Where R(w) == sum from i=1 to N: 1/N * sample_risk(i,w) and + # sample_risk(i,w) == max over all + # Y: LOSS(i,Y) + F(x_i,Y|w) - F(x_i,y_i|w) and C > 0 + # + # You can think of the sample_risk(i,w) as measuring the degree of error + # you would make when predicting the label of the i-th sample using + # parameters w. That is, it is zero only when the correct label would be + # predicted and grows larger the more "wrong" the predicted output becomes. + # Therefore, the objective function is minimizing a balance between making + # the weights small (typically this reduces overfitting) and fitting the + # training data. The degree to which you try to fit the data is controlled + # by the C parameter. + # + # For a more detailed introduction to structured support vector machines + # you should consult the following paper: + # Predicting Structured Objects with Support Vector Machines by + # Thorsten Joachims, Thomas Hofmann, Yisong Yue, and Chun-nam Yu + # + + # Finally, we come back to the code. To use + # dlib.solve_structural_svm_problem() you need to provide the things + # discussed above. This is the value of C, the number of training samples, + # the dimensionality of PSI(), as well as methods for calculating the loss + # values and PSI() vectors. You will also need to write code that can + # compute: + # max over all Y: LOSS(i,Y) + F(x_i,Y|w). To summarize, the + # ThreeClassClassifierProblem class is required to have the following + # fields: + # - C + # - num_samples + # - num_dimensions + # - get_truth_joint_feature_vector() + # - separation_oracle() + + C = 1 + + # There are also a number of optional arguments: + # epsilon is the stopping tolerance. The optimizer will run until R(w) is + # within epsilon of its optimal value. If you don't set this then it + # defaults to 0.001. + # epsilon = 1e-13 + + # Uncomment this and the optimizer will print its progress to standard + # out. You will be able to see things like the current risk gap. The + # optimizer continues until the + # risk gap is below epsilon. + # be_verbose = True + + # If you want to require that the learned weights are all non-negative + # then set this field to True. + # learns_nonnegative_weights = True + + # The optimizer uses an internal cache to avoid unnecessary calls to your + # separation_oracle() routine. This parameter controls the size of that + # cache. Bigger values use more RAM and might make the optimizer run + # faster. You can also disable it by setting it to 0 which is good to do + # when your separation_oracle is very fast. If If you don't call this + # function it defaults to a value of 5. + # max_cache_size = 20 + + def __init__(self, samples, labels): + # dlib.solve_structural_svm_problem() expects the class to have + # num_samples and num_dimensions fields. These fields should contain + # the number of training samples and the dimensionality of the PSI + # feature vector respectively. + self.num_samples = len(samples) + self.num_dimensions = len(samples[0])*3 + + self.samples = samples + self.labels = labels + + def make_psi(self, x, label): + """Compute PSI(x,label).""" + # All we are doing here is taking x, which is a 3 dimensional sample + # vector in this example program, and putting it into one of 3 places in + # a 9 dimensional PSI vector, which we then return. So this function + # returns PSI(x,label). To see why we setup PSI like this, recall how + # predict_label() works. It takes in a 9 dimensional weight vector and + # breaks the vector into 3 pieces. Each piece then defines a different + # classifier and we use them in a one-vs-all manner to predict the + # label. So now that we are in the structural SVM code we have to + # define the PSI vector to correspond to this usage. That is, we need + # to setup PSI so that argmax_y dot(weights,PSI(x,y)) == + # predict_label(weights,x). This is how we tell the structural SVM + # solver what kind of problem we are trying to solve. + # + # It's worth emphasizing that the single biggest step in using a + # structural SVM is deciding how you want to represent PSI(x,label). It + # is always a vector, but deciding what to put into it to solve your + # problem is often not a trivial task. Part of the difficulty is that + # you need an efficient method for finding the label that makes + # dot(w,PSI(x,label)) the biggest. Sometimes this is easy, but often + # finding the max scoring label turns into a difficult combinatorial + # optimization problem. So you need to pick a PSI that doesn't make the + # label maximization step intractable but also still well models your + # problem. + # + # Create a dense vector object (note that you can also use unsorted + # sparse vectors (i.e. dlib.sparse_vector objects) to represent your + # PSI vector. This is useful if you have very high dimensional PSI + # vectors that are mostly zeros. In the context of this example, you + # would simply return a dlib.sparse_vector at the end of make_psi() and + # the rest of the example would still work properly. ). + psi = dlib.vector() + # Set it to have 9 dimensions. Note that the elements of the vector + # are 0 initialized. + psi.resize(self.num_dimensions) + dims = len(x) + if label == 0: + for i in range(0, dims): + psi[i] = x[i] + elif label == 1: + for i in range(dims, 2 * dims): + psi[i] = x[i - dims] + else: # the label must be 2 + for i in range(2 * dims, 3 * dims): + psi[i] = x[i - 2 * dims] + return psi + + # Now we get to the two member functions that are directly called by + # dlib.solve_structural_svm_problem(). + # + # In get_truth_joint_feature_vector(), all you have to do is return the + # PSI() vector for the idx-th training sample when it has its true label. + # So here it returns + # PSI(self.samples[idx], self.labels[idx]). + def get_truth_joint_feature_vector(self, idx): + return self.make_psi(self.samples[idx], self.labels[idx]) + + # separation_oracle() is more interesting. + # dlib.solve_structural_svm_problem() will call separation_oracle() many + # times during the optimization. Each time it will give it the current + # value of the parameter weights and the separation_oracle() is supposed to + # find the label that most violates the structural SVM objective function + # for the idx-th sample. Then the separation oracle reports the + # corresponding PSI vector and loss value. To state this more precisely, + # the separation_oracle() member function has the following contract: + # requires + # - 0 <= idx < self.num_samples + # - len(current_solution) == self.num_dimensions + # ensures + # - runs the separation oracle on the idx-th sample. + # We define this as follows: + # - let X == the idx-th training sample. + # - let PSI(X,y) == the joint feature vector for input X + # and an arbitrary label y. + # - let F(X,y) == dot(current_solution,PSI(X,y)). + # - let LOSS(idx,y) == the loss incurred for predicting that the + # idx-th sample has a label of y. Note that LOSS() + # should always be >= 0 and should become exactly 0 when y is the + # correct label for the idx-th sample. + # + # Then the separation oracle finds a Y such that: + # Y = argmax over all y: LOSS(idx,y) + F(X,y) + # (i.e. It finds the label which maximizes the above expression.) + # + # Finally, separation_oracle() returns LOSS(idx,Y),PSI(X,Y) + def separation_oracle(self, idx, current_solution): + samp = self.samples[idx] + dims = len(samp) + scores = [0, 0, 0] + # compute scores for each of the three classifiers + scores[0] = dot(current_solution[0:dims], samp) + scores[1] = dot(current_solution[dims:2*dims], samp) + scores[2] = dot(current_solution[2*dims:3*dims], samp) + + # Add in the loss-augmentation. Recall that we maximize + # LOSS(idx,y) + F(X,y) in the separate oracle, not just F(X,y) as we + # normally would in predict_label(). Therefore, we must add in this + # extra amount to account for the loss-augmentation. For our simple + # multi-class classifier, we incur a loss of 1 if we don't predict the + # correct label and a loss of 0 if we get the right label. + if self.labels[idx] != 0: + scores[0] += 1 + if self.labels[idx] != 1: + scores[1] += 1 + if self.labels[idx] != 2: + scores[2] += 1 + + # Now figure out which classifier has the largest loss-augmented score. + max_scoring_label = scores.index(max(scores)) + # And finally record the loss that was associated with that predicted + # label. Again, the loss is 1 if the label is incorrect and 0 otherwise. + if max_scoring_label == self.labels[idx]: + loss = 0 + else: + loss = 1 + + # Finally, return the loss and PSI vector corresponding to the label + # we just found. + psi = self.make_psi(samp, max_scoring_label) + return loss, psi + + +if __name__ == "__main__": + main() diff --git a/ml/dlib/python_examples/train_object_detector.py b/ml/dlib/python_examples/train_object_detector.py new file mode 100755 index 00000000..aef3fe16 --- /dev/null +++ b/ml/dlib/python_examples/train_object_detector.py @@ -0,0 +1,183 @@ +#!/usr/bin/python +# The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +# +# This example program shows how you can use dlib to make a HOG based object +# detector for things like faces, pedestrians, and any other semi-rigid +# object. In particular, we go though the steps to train the kind of sliding +# window object detector first published by Dalal and Triggs in 2005 in the +# paper Histograms of Oriented Gradients for Human Detection. +# +# +# COMPILING/INSTALLING THE DLIB PYTHON INTERFACE +# You can install dlib using the command: +# pip install dlib +# +# Alternatively, if you want to compile dlib yourself then go into the dlib +# root folder and run: +# python setup.py install +# or +# python setup.py install --yes USE_AVX_INSTRUCTIONS +# if you have a CPU that supports AVX instructions, since this makes some +# things run faster. +# +# Compiling dlib should work on any operating system so long as you have +# CMake installed. On Ubuntu, this can be done easily by running the +# command: +# sudo apt-get install cmake +# +# Also note that this example requires scikit-image which can be installed +# via the command: +# pip install scikit-image +# Or downloaded from http://scikit-image.org/download.html. + +import os +import sys +import glob + +import dlib +from skimage import io + + +# In this example we are going to train a face detector based on the small +# faces dataset in the examples/faces directory. This means you need to supply +# the path to this faces folder as a command line argument so we will know +# where it is. +if len(sys.argv) != 2: + print( + "Give the path to the examples/faces directory as the argument to this " + "program. For example, if you are in the python_examples folder then " + "execute this program by running:\n" + " ./train_object_detector.py ../examples/faces") + exit() +faces_folder = sys.argv[1] + + +# Now let's do the training. The train_simple_object_detector() function has a +# bunch of options, all of which come with reasonable default values. The next +# few lines goes over some of these options. +options = dlib.simple_object_detector_training_options() +# Since faces are left/right symmetric we can tell the trainer to train a +# symmetric detector. This helps it get the most value out of the training +# data. +options.add_left_right_image_flips = True +# The trainer is a kind of support vector machine and therefore has the usual +# SVM C parameter. In general, a bigger C encourages it to fit the training +# data better but might lead to overfitting. You must find the best C value +# empirically by checking how well the trained detector works on a test set of +# images you haven't trained on. Don't just leave the value set at 5. Try a +# few different C values and see what works best for your data. +options.C = 5 +# Tell the code how many CPU cores your computer has for the fastest training. +options.num_threads = 4 +options.be_verbose = True + + +training_xml_path = os.path.join(faces_folder, "training.xml") +testing_xml_path = os.path.join(faces_folder, "testing.xml") +# This function does the actual training. It will save the final detector to +# detector.svm. The input is an XML file that lists the images in the training +# dataset and also contains the positions of the face boxes. To create your +# own XML files you can use the imglab tool which can be found in the +# tools/imglab folder. It is a simple graphical tool for labeling objects in +# images with boxes. To see how to use it read the tools/imglab/README.txt +# file. But for this example, we just use the training.xml file included with +# dlib. +dlib.train_simple_object_detector(training_xml_path, "detector.svm", options) + + + +# Now that we have a face detector we can test it. The first statement tests +# it on the training data. It will print(the precision, recall, and then) +# average precision. +print("") # Print blank line to create gap from previous output +print("Training accuracy: {}".format( + dlib.test_simple_object_detector(training_xml_path, "detector.svm"))) +# However, to get an idea if it really worked without overfitting we need to +# run it on images it wasn't trained on. The next line does this. Happily, we +# see that the object detector works perfectly on the testing images. +print("Testing accuracy: {}".format( + dlib.test_simple_object_detector(testing_xml_path, "detector.svm"))) + + + + + +# Now let's use the detector as you would in a normal application. First we +# will load it from disk. +detector = dlib.simple_object_detector("detector.svm") + +# We can look at the HOG filter we learned. It should look like a face. Neat! +win_det = dlib.image_window() +win_det.set_image(detector) + +# Now let's run the detector over the images in the faces folder and display the +# results. +print("Showing detections on the images in the faces folder...") +win = dlib.image_window() +for f in glob.glob(os.path.join(faces_folder, "*.jpg")): + print("Processing file: {}".format(f)) + img = io.imread(f) + dets = detector(img) + print("Number of faces detected: {}".format(len(dets))) + for k, d in enumerate(dets): + print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format( + k, d.left(), d.top(), d.right(), d.bottom())) + + win.clear_overlay() + win.set_image(img) + win.add_overlay(dets) + dlib.hit_enter_to_continue() + + + + +# Next, suppose you have trained multiple detectors and you want to run them +# efficiently as a group. You can do this as follows: +detector1 = dlib.fhog_object_detector("detector.svm") +# In this example we load detector.svm again since it's the only one we have on +# hand. But in general it would be a different detector. +detector2 = dlib.fhog_object_detector("detector.svm") +# make a list of all the detectors you wan to run. Here we have 2, but you +# could have any number. +detectors = [detector1, detector2] +image = io.imread(faces_folder + '/2008_002506.jpg'); +[boxes, confidences, detector_idxs] = dlib.fhog_object_detector.run_multiple(detectors, image, upsample_num_times=1, adjust_threshold=0.0) +for i in range(len(boxes)): + print("detector {} found box {} with confidence {}.".format(detector_idxs[i], boxes[i], confidences[i])) + + + + +# Finally, note that you don't have to use the XML based input to +# train_simple_object_detector(). If you have already loaded your training +# images and bounding boxes for the objects then you can call it as shown +# below. + +# You just need to put your images into a list. +images = [io.imread(faces_folder + '/2008_002506.jpg'), + io.imread(faces_folder + '/2009_004587.jpg')] +# Then for each image you make a list of rectangles which give the pixel +# locations of the edges of the boxes. +boxes_img1 = ([dlib.rectangle(left=329, top=78, right=437, bottom=186), + dlib.rectangle(left=224, top=95, right=314, bottom=185), + dlib.rectangle(left=125, top=65, right=214, bottom=155)]) +boxes_img2 = ([dlib.rectangle(left=154, top=46, right=228, bottom=121), + dlib.rectangle(left=266, top=280, right=328, bottom=342)]) +# And then you aggregate those lists of boxes into one big list and then call +# train_simple_object_detector(). +boxes = [boxes_img1, boxes_img2] + +detector2 = dlib.train_simple_object_detector(images, boxes, options) +# We could save this detector to disk by uncommenting the following. +#detector2.save('detector2.svm') + +# Now let's look at its HOG filter! +win_det.set_image(detector2) +dlib.hit_enter_to_continue() + +# Note that you don't have to use the XML based input to +# test_simple_object_detector(). If you have already loaded your training +# images and bounding boxes for the objects then you can call it as shown +# below. +print("\nTraining accuracy: {}".format( + dlib.test_simple_object_detector(images, boxes, detector2))) diff --git a/ml/dlib/python_examples/train_shape_predictor.py b/ml/dlib/python_examples/train_shape_predictor.py new file mode 100755 index 00000000..23758b2c --- /dev/null +++ b/ml/dlib/python_examples/train_shape_predictor.py @@ -0,0 +1,135 @@ +#!/usr/bin/python +# The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +# +# This example program shows how to use dlib's implementation of the paper: +# One Millisecond Face Alignment with an Ensemble of Regression Trees by +# Vahid Kazemi and Josephine Sullivan, CVPR 2014 +# +# In particular, we will train a face landmarking model based on a small +# dataset and then evaluate it. If you want to visualize the output of the +# trained model on some images then you can run the +# face_landmark_detection.py example program with predictor.dat as the input +# model. +# +# It should also be noted that this kind of model, while often used for face +# landmarking, is quite general and can be used for a variety of shape +# prediction tasks. But here we demonstrate it only on a simple face +# landmarking task. +# +# COMPILING/INSTALLING THE DLIB PYTHON INTERFACE +# You can install dlib using the command: +# pip install dlib +# +# Alternatively, if you want to compile dlib yourself then go into the dlib +# root folder and run: +# python setup.py install +# or +# python setup.py install --yes USE_AVX_INSTRUCTIONS +# if you have a CPU that supports AVX instructions, since this makes some +# things run faster. +# +# Compiling dlib should work on any operating system so long as you have +# CMake installed. On Ubuntu, this can be done easily by running the +# command: +# sudo apt-get install cmake +# +# Also note that this example requires scikit-image which can be installed +# via the command: +# pip install scikit-image +# Or downloaded from http://scikit-image.org/download.html. + +import os +import sys +import glob + +import dlib +from skimage import io + + +# In this example we are going to train a face detector based on the small +# faces dataset in the examples/faces directory. This means you need to supply +# the path to this faces folder as a command line argument so we will know +# where it is. +if len(sys.argv) != 2: + print( + "Give the path to the examples/faces directory as the argument to this " + "program. For example, if you are in the python_examples folder then " + "execute this program by running:\n" + " ./train_shape_predictor.py ../examples/faces") + exit() +faces_folder = sys.argv[1] + +options = dlib.shape_predictor_training_options() +# Now make the object responsible for training the model. +# This algorithm has a bunch of parameters you can mess with. The +# documentation for the shape_predictor_trainer explains all of them. +# You should also read Kazemi's paper which explains all the parameters +# in great detail. However, here I'm just setting three of them +# differently than their default values. I'm doing this because we +# have a very small dataset. In particular, setting the oversampling +# to a high amount (300) effectively boosts the training set size, so +# that helps this example. +options.oversampling_amount = 300 +# I'm also reducing the capacity of the model by explicitly increasing +# the regularization (making nu smaller) and by using trees with +# smaller depths. +options.nu = 0.05 +options.tree_depth = 2 +options.be_verbose = True + +# dlib.train_shape_predictor() does the actual training. It will save the +# final predictor to predictor.dat. The input is an XML file that lists the +# images in the training dataset and also contains the positions of the face +# parts. +training_xml_path = os.path.join(faces_folder, "training_with_face_landmarks.xml") +dlib.train_shape_predictor(training_xml_path, "predictor.dat", options) + +# Now that we have a model we can test it. dlib.test_shape_predictor() +# measures the average distance between a face landmark output by the +# shape_predictor and where it should be according to the truth data. +print("\nTraining accuracy: {}".format( + dlib.test_shape_predictor(training_xml_path, "predictor.dat"))) +# The real test is to see how well it does on data it wasn't trained on. We +# trained it on a very small dataset so the accuracy is not extremely high, but +# it's still doing quite good. Moreover, if you train it on one of the large +# face landmarking datasets you will obtain state-of-the-art results, as shown +# in the Kazemi paper. +testing_xml_path = os.path.join(faces_folder, "testing_with_face_landmarks.xml") +print("Testing accuracy: {}".format( + dlib.test_shape_predictor(testing_xml_path, "predictor.dat"))) + +# Now let's use it as you would in a normal application. First we will load it +# from disk. We also need to load a face detector to provide the initial +# estimate of the facial location. +predictor = dlib.shape_predictor("predictor.dat") +detector = dlib.get_frontal_face_detector() + +# Now let's run the detector and shape_predictor over the images in the faces +# folder and display the results. +print("Showing detections and predictions on the images in the faces folder...") +win = dlib.image_window() +for f in glob.glob(os.path.join(faces_folder, "*.jpg")): + print("Processing file: {}".format(f)) + img = io.imread(f) + + win.clear_overlay() + win.set_image(img) + + # Ask the detector to find the bounding boxes of each face. The 1 in the + # second argument indicates that we should upsample the image 1 time. This + # will make everything bigger and allow us to detect more faces. + dets = detector(img, 1) + print("Number of faces detected: {}".format(len(dets))) + for k, d in enumerate(dets): + print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format( + k, d.left(), d.top(), d.right(), d.bottom())) + # Get the landmarks/parts for the face in box d. + shape = predictor(img, d) + print("Part 0: {}, Part 1: {} ...".format(shape.part(0), + shape.part(1))) + # Draw the face landmarks on the screen. + win.add_overlay(shape) + + win.add_overlay(dets) + dlib.hit_enter_to_continue() + |