diff options
Diffstat (limited to 'ml/dlib/examples')
331 files changed, 23069 insertions, 0 deletions
diff --git a/ml/dlib/examples/3d_point_cloud_ex.cpp b/ml/dlib/examples/3d_point_cloud_ex.cpp new file mode 100644 index 00000000..f64a6897 --- /dev/null +++ b/ml/dlib/examples/3d_point_cloud_ex.cpp @@ -0,0 +1,50 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the perspective_window tool + in the dlib C++ Library. It is a simple tool for displaying 3D point + clouds on the screen. + +*/ + +#include <dlib/gui_widgets.h> +#include <dlib/image_transforms.h> +#include <cmath> + +using namespace dlib; +using namespace std; + +// ---------------------------------------------------------------------------------------- + +int main() +{ + // Let's make a point cloud that looks like a 3D spiral. + std::vector<perspective_window::overlay_dot> points; + dlib::rand rnd; + for (double i = 0; i < 20; i+=0.001) + { + // Get a point on a spiral + dlib::vector<double> val(sin(i),cos(i),i/4); + + // Now add some random noise to it + dlib::vector<double> temp(rnd.get_random_gaussian(), + rnd.get_random_gaussian(), + rnd.get_random_gaussian()); + val += temp/20; + + // Pick a color based on how far we are along the spiral + rgb_pixel color = colormap_jet(i,0,20); + + // And add the point to the list of points we will display + points.push_back(perspective_window::overlay_dot(val, color)); + } + + // Now finally display the point cloud. + perspective_window win; + win.set_title("perspective_window 3D point cloud"); + win.add_overlay(points); + win.wait_until_closed(); +} + +// ---------------------------------------------------------------------------- + diff --git a/ml/dlib/examples/CMakeLists.txt b/ml/dlib/examples/CMakeLists.txt new file mode 100644 index 00000000..5c408d74 --- /dev/null +++ b/ml/dlib/examples/CMakeLists.txt @@ -0,0 +1,250 @@ +# +# _______ _ _ _____ _____ _____ _____ +# |__ __| | | |_ _|/ ____| |_ _|/ ____| /\ +# | | | |__| | | | | (___ | | | (___ / \ +# | | | __ | | | \___ \ | | \___ \ / /\ \ +# | | | | | |_| |_ ____) | _| |_ ____) | / ____ \ +# |_|__|_|_ |_|_____|_____/__ |_____|_____/ /_/ _ \_\ +# |__ __| | | |__ __/ __ \| __ \|_ _| /\ | | +# | | | | | | | | | | | | |__) | | | / \ | | +# | | | | | | | | | | | | _ / | | / /\ \ | | +# | | | |__| | | | | |__| | | \ \ _| |_ / ____ \| |____ +# |_| \____/ |_| \____/|_| \_\_____/_/ \_\______| +# +# +# _____ ______ _____ _______ _ _ ______ +# | __ \| ____| /\ | __ \ |__ __| | | | ____| +# | |__) | |__ / \ | | | | | | | |__| | |__ +# | _ /| __| / /\ \ | | | | | | | __ | __| +# | | \ \| |____ / ____ \| |__| | | | | | | | |____ +# |_|__\_\______/_/_ __\_\_____/__ _ |_|__|_|_ |_|______|_ _ _ +# / ____/ __ \| \/ | \/ | ____| \ | |__ __/ ____| | | | | | +# | | | | | | \ / | \ / | |__ | \| | | | | (___ | | | | | +# | | | | | | |\/| | |\/| | __| | . ` | | | \___ \ | | | | | +# | |___| |__| | | | | | | | |____| |\ | | | ____) | |_|_|_|_| +# \_____\____/|_| |_|_| |_|______|_| \_| |_| |_____/ (_|_|_|_) +# +# +# +# This is a CMake makefile. CMake is a tool that helps you build C++ programs. +# You can download CMake from http://www.cmake.org. This CMakeLists.txt file +# you are reading builds dlib's example programs. +# + + +cmake_minimum_required(VERSION 2.8.12) +# Every project needs a name. We call this the "examples" project. +project(examples) + + +# Tell cmake we will need dlib. This command will pull in dlib and compile it +# into your project. Note that you don't need to compile or install dlib. All +# cmake needs is the dlib source code folder and it will take care of everything. +add_subdirectory(../dlib dlib_build) + + +# The next thing we need to do is tell CMake about the code you want to +# compile. We do this with the add_executable() statement which takes the name +# of the output executable and then a list of .cpp files to compile. Here we +# are going to compile one of the dlib example programs which has only one .cpp +# file, assignment_learning_ex.cpp. If your program consisted of multiple .cpp +# files you would simply list them here in the add_executable() statement. +add_executable(assignment_learning_ex assignment_learning_ex.cpp) +# Finally, you need to tell CMake that this program, assignment_learning_ex, +# depends on dlib. You do that with this statement: +target_link_libraries(assignment_learning_ex dlib::dlib) + + + +# To compile this program all you need to do is ask cmake. You would type +# these commands from within the directory containing this CMakeLists.txt +# file: +# mkdir build +# cd build +# cmake .. +# cmake --build . --config Release +# +# The cmake .. command looks in the parent folder for a file named +# CMakeLists.txt, reads it, and sets up everything needed to build program. +# Also, note that CMake can generate Visual Studio or XCode project files. So +# if instead you had written: +# cd build +# cmake .. -G Xcode +# +# You would be able to open the resulting Xcode project and compile and edit +# the example programs within the Xcode IDE. CMake can generate a lot of +# different types of IDE projects. Run the cmake -h command to see a list of +# arguments to -G to see what kinds of projects cmake can generate for you. It +# probably includes your favorite IDE in the list. + + + + +################################################################################# +################################################################################# +# A CMakeLists.txt file can compile more than just one program. So below we +# tell it to compile the other dlib example programs using pretty much the +# same CMake commands we used above. +################################################################################# +################################################################################# + + +# Since there are a lot of examples I'm going to use a macro to simplify this +# CMakeLists.txt file. However, usually you will create only one executable in +# your cmake projects and use the syntax shown above. +macro(add_example name) + add_executable(${name} ${name}.cpp) + target_link_libraries(${name} dlib::dlib ) +endmacro() + +# if an example requires GUI, call this macro to check DLIB_NO_GUI_SUPPORT to include or exclude +macro(add_gui_example name) + if (DLIB_NO_GUI_SUPPORT) + message("No GUI support, so we won't build the ${name} example.") + else() + add_example(${name}) + endif() +endmacro() + +# The deep learning toolkit requires a compiler with essentially complete C++11 +# support. However, versions of Visual Studio prior to October 2016 didn't +# provide enough C++11 support to compile the DNN tooling, but were good enough +# to compile the rest of dlib. So new versions of Visual Studio 2015 will +# work. However, Visual Studio 2017 had some C++11 support regressions, so it +# wasn't until December 2017 that Visual Studio 2017 had good enough C++11 +# support to compile the DNN examples. So if you are using Visual Studio, make +# sure you have an updated version if you want to compile the DNN code. +# +# Also note that Visual Studio users should give the -T host=x64 option so that +# CMake will instruct Visual Studio to use its 64bit toolchain. If you don't +# do this then by default Visual Studio uses a 32bit toolchain, WHICH RESTRICTS +# THE COMPILER TO ONLY 2GB OF RAM, causing it to run out of RAM and crash when +# compiling some of the DNN examples. So generate your project with a statement +# like this: +# cmake .. -G "Visual Studio 14 2015 Win64" -T host=x64 +if (NOT USING_OLD_VISUAL_STUDIO_COMPILER) + add_example(dnn_metric_learning_ex) + add_gui_example(dnn_face_recognition_ex) + add_example(dnn_introduction_ex) + add_example(dnn_introduction2_ex) + add_example(dnn_inception_ex) + add_gui_example(dnn_mmod_ex) + add_gui_example(dnn_mmod_face_detection_ex) + add_gui_example(random_cropper_ex) + add_gui_example(dnn_mmod_dog_hipsterizer) + add_gui_example(dnn_imagenet_ex) + add_gui_example(dnn_mmod_find_cars_ex) + add_gui_example(dnn_mmod_find_cars2_ex) + add_example(dnn_mmod_train_find_cars_ex) + add_gui_example(dnn_semantic_segmentation_ex) + add_example(dnn_imagenet_train_ex) + add_example(dnn_semantic_segmentation_train_ex) + add_example(dnn_metric_learning_on_images_ex) +endif() + + +if (DLIB_NO_GUI_SUPPORT) + message("No GUI support, so we won't build the webcam_face_pose_ex example.") +else() + find_package(OpenCV QUIET) + if (OpenCV_FOUND) + include_directories(${OpenCV_INCLUDE_DIRS}) + + add_executable(webcam_face_pose_ex webcam_face_pose_ex.cpp) + target_link_libraries(webcam_face_pose_ex dlib::dlib ${OpenCV_LIBS} ) + else() + message("OpenCV not found, so we won't build the webcam_face_pose_ex example.") + endif() +endif() + + + +#here we apply our macros +add_gui_example(3d_point_cloud_ex) +add_example(bayes_net_ex) +add_example(bayes_net_from_disk_ex) +add_gui_example(bayes_net_gui_ex) +add_example(bridge_ex) +add_example(bsp_ex) +add_example(compress_stream_ex) +add_example(config_reader_ex) +add_example(custom_trainer_ex) +add_example(dir_nav_ex) +add_example(empirical_kernel_map_ex) +add_gui_example(face_detection_ex) +add_gui_example(face_landmark_detection_ex) +add_gui_example(fhog_ex) +add_gui_example(fhog_object_detector_ex) +add_example(file_to_code_ex) +add_example(graph_labeling_ex) +add_gui_example(gui_api_ex) +add_gui_example(hough_transform_ex) +add_gui_example(image_ex) +add_example(integrate_function_adapt_simp_ex) +add_example(iosockstream_ex) +add_example(kcentroid_ex) +add_example(kkmeans_ex) +add_example(krls_ex) +add_example(krls_filter_ex) +add_example(krr_classification_ex) +add_example(krr_regression_ex) +add_example(learning_to_track_ex) +add_example(least_squares_ex) +add_example(linear_manifold_regularizer_ex) +add_example(logger_custom_output_ex) +add_example(logger_ex) +add_example(logger_ex_2) +add_example(matrix_ex) +add_example(matrix_expressions_ex) +add_example(max_cost_assignment_ex) +add_example(member_function_pointer_ex) +add_example(mlp_ex) +add_example(model_selection_ex) +add_gui_example(mpc_ex) +add_example(multiclass_classification_ex) +add_example(multithreaded_object_ex) +add_gui_example(object_detector_advanced_ex) +add_gui_example(object_detector_ex) +add_gui_example(one_class_classifiers_ex) +add_example(optimization_ex) +add_example(parallel_for_ex) +add_example(pipe_ex) +add_example(pipe_ex_2) +add_example(quantum_computing_ex) +add_example(queue_ex) +add_example(rank_features_ex) +add_example(running_stats_ex) +add_example(rvm_ex) +add_example(rvm_regression_ex) +add_example(sequence_labeler_ex) +add_example(sequence_segmenter_ex) +add_example(server_http_ex) +add_example(server_iostream_ex) +add_example(sockets_ex) +add_example(sockstreambuf_ex) +add_example(std_allocator_ex) +add_gui_example(surf_ex) +add_example(svm_c_ex) +add_example(svm_ex) +add_example(svm_pegasos_ex) +add_example(svm_rank_ex) +add_example(svm_sparse_ex) +add_example(svm_struct_ex) +add_example(svr_ex) +add_example(thread_function_ex) +add_example(thread_pool_ex) +add_example(threaded_object_ex) +add_example(threads_ex) +add_example(timer_ex) +add_gui_example(train_object_detector) +add_example(train_shape_predictor_ex) +add_example(using_custom_kernels_ex) +add_gui_example(video_tracking_ex) +add_example(xml_parser_ex) + + +if (DLIB_LINK_WITH_SQLITE3) + add_example(sqlite_ex) +endif() + + diff --git a/ml/dlib/examples/LICENSE_FOR_EXAMPLE_PROGRAMS.txt b/ml/dlib/examples/LICENSE_FOR_EXAMPLE_PROGRAMS.txt new file mode 100644 index 00000000..c69b87af --- /dev/null +++ b/ml/dlib/examples/LICENSE_FOR_EXAMPLE_PROGRAMS.txt @@ -0,0 +1,22 @@ +The intent of the example programs supplied with the dlib C++ library is +to both instruct users and to also provide a simple body of code they +may copy and paste from. To make this as painless as possible all the +example programs have been placed into the public domain. + + +This work is hereby released into the Public Domain. +To view a copy of the public domain dedication, visit +http://creativecommons.org/licenses/publicdomain/ or send a +letter to + Creative Commons + 171 Second Street + Suite 300, + San Francisco, California, 94105, USA. + + + +Public domain dedications are not recognized by some countries. So +if you live in an area where the above dedication isn't valid then +you can consider the example programs to be licensed under the Boost +Software License. + diff --git a/ml/dlib/examples/assignment_learning_ex.cpp b/ml/dlib/examples/assignment_learning_ex.cpp new file mode 100644 index 00000000..7a3acd01 --- /dev/null +++ b/ml/dlib/examples/assignment_learning_ex.cpp @@ -0,0 +1,325 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the dlib machine learning tools for + learning to solve the assignment problem. + + Many tasks in computer vision or natural language processing can be thought of + as assignment problems. For example, in a computer vision application where + you are trying to track objects moving around in video, you likely need to solve + an association problem every time you get a new video frame. That is, each new + frame will contain objects (e.g. people, cars, etc.) and you will want to + determine which of these objects are actually things you have seen in previous + frames. + + The assignment problem can be optimally solved using the well known Hungarian + algorithm. However, this algorithm requires the user to supply some function + which measures the "goodness" of an individual association. In many cases the + best way to measure this goodness isn't obvious and therefore machine learning + methods are used. + + The remainder of this example will show you how to learn a goodness function + which is optimal, in a certain sense, for use with the Hungarian algorithm. To + do this, we will make a simple dataset of example associations and use them to + train a supervised machine learning method. + + Finally, note that there is a whole example program dedicated to assignment + learning problems where you are trying to make an object tracker. So if that is + what you are interested in then take a look at the learning_to_track_ex.cpp + example program. +*/ + + +#include <iostream> +#include <dlib/svm_threaded.h> + +using namespace std; +using namespace dlib; + + +// ---------------------------------------------------------------------------------------- + +/* + In an association problem, we will talk about the "Left Hand Set" (LHS) and the + "Right Hand Set" (RHS). The task will be to learn to map all elements of LHS to + unique elements of RHS. If an element of LHS can't be mapped to a unique element of + RHS for some reason (e.g. LHS is bigger than RHS) then it can also be mapped to the + special -1 output, indicating no mapping to RHS. + + So the first step is to define the type of elements in each of these sets. In the + code below we will use column vectors in both LHS and RHS. However, in general, + they can each contain any type you like. LHS can even contain a different type + than RHS. +*/ + +typedef dlib::matrix<double,0,1> column_vector; + +// This type represents a pair of LHS and RHS. That is, sample_type::first +// contains a left hand set and sample_type::second contains a right hand set. +typedef std::pair<std::vector<column_vector>, std::vector<column_vector> > sample_type; + +// This type will contain the association information between LHS and RHS. That is, +// it will determine which elements of LHS map to which elements of RHS. +typedef std::vector<long> label_type; + +// In this example, all our LHS and RHS elements will be 3-dimensional vectors. +const unsigned long num_dims = 3; + +void make_data ( + std::vector<sample_type>& samples, + std::vector<label_type>& labels +); +/*! + ensures + - This function creates a training dataset of 5 example associations. + - #samples.size() == 5 + - #labels.size() == 5 + - for all valid i: + - #samples[i].first == a left hand set + - #samples[i].second == a right hand set + - #labels[i] == a set of integers indicating how to map LHS to RHS. To be + precise: + - #samples[i].first.size() == #labels[i].size() + - for all valid j: + -1 <= #labels[i][j] < #samples[i].second.size() + (A value of -1 indicates that #samples[i].first[j] isn't associated with anything. + All other values indicate the associating element of #samples[i].second) + - All elements of #labels[i] which are not equal to -1 are unique. That is, + multiple elements of #samples[i].first can't associate to the same element + in #samples[i].second. +!*/ + +// ---------------------------------------------------------------------------------------- + +struct feature_extractor +{ + /*! + Recall that our task is to learn the "goodness of assignment" function for + use with the Hungarian algorithm. The dlib tools assume this function + can be written as: + match_score(l,r) == dot(w, PSI(l,r)) + bias + where l is an element of LHS, r is an element of RHS, w is a parameter vector, + bias is a scalar value, and PSI() is a user supplied feature extractor. + + This feature_extractor is where we implement PSI(). How you implement this + is highly problem dependent. + !*/ + + // The type of feature vector returned from get_features(). This must be either + // a dlib::matrix or a sparse vector. + typedef column_vector feature_vector_type; + + // The types of elements in the LHS and RHS sets + typedef column_vector lhs_element; + typedef column_vector rhs_element; + + + unsigned long num_features() const + { + // Return the dimensionality of feature vectors produced by get_features() + return num_dims; + } + + void get_features ( + const lhs_element& left, + const rhs_element& right, + feature_vector_type& feats + ) const + /*! + ensures + - #feats == PSI(left,right) + (i.e. This function computes a feature vector which, in some sense, + captures information useful for deciding if matching left to right + is "good"). + !*/ + { + // Let's just use the squared difference between each vector as our features. + // However, it should be emphasized that how to compute the features here is very + // problem dependent. + feats = squared(left - right); + } + +}; + +// We need to define serialize() and deserialize() for our feature extractor if we want +// to be able to serialize and deserialize our learned models. In this case the +// implementation is empty since our feature_extractor doesn't have any state. But you +// might define more complex feature extractors which have state that needs to be saved. +void serialize (const feature_extractor& , std::ostream& ) {} +void deserialize (feature_extractor& , std::istream& ) {} + +// ---------------------------------------------------------------------------------------- + +int main() +{ + try + { + // Get a small bit of training data. + std::vector<sample_type> samples; + std::vector<label_type> labels; + make_data(samples, labels); + + + structural_assignment_trainer<feature_extractor> trainer; + // This is the common SVM C parameter. Larger values encourage the + // trainer to attempt to fit the data exactly but might overfit. + // In general, you determine this parameter by cross-validation. + trainer.set_c(10); + // This trainer can use multiple CPU cores to speed up the training. + // So set this to the number of available CPU cores. + trainer.set_num_threads(4); + + // Do the training and save the results in assigner. + assignment_function<feature_extractor> assigner = trainer.train(samples, labels); + + + // Test the assigner on our data. The output will indicate that it makes the + // correct associations on all samples. + cout << "Test the learned assignment function: " << endl; + for (unsigned long i = 0; i < samples.size(); ++i) + { + // Predict the assignments for the LHS and RHS in samples[i]. + std::vector<long> predicted_assignments = assigner(samples[i]); + cout << "true labels: " << trans(mat(labels[i])); + cout << "predicted labels: " << trans(mat(predicted_assignments)) << endl; + } + + // We can also use this tool to compute the percentage of assignments predicted correctly. + cout << "training accuracy: " << test_assignment_function(assigner, samples, labels) << endl; + + + // Since testing on your training data is a really bad idea, we can also do 5-fold cross validation. + // Happily, this also indicates that all associations were made correctly. + randomize_samples(samples, labels); + cout << "cv accuracy: " << cross_validate_assignment_trainer(trainer, samples, labels, 5) << endl; + + + + // Finally, the assigner can be serialized to disk just like most dlib objects. + serialize("assigner.dat") << assigner; + + // recall from disk + deserialize("assigner.dat") >> assigner; + } + catch (std::exception& e) + { + cout << "EXCEPTION THROWN" << endl; + cout << e.what() << endl; + } +} + +// ---------------------------------------------------------------------------------------- + +void make_data ( + std::vector<sample_type>& samples, + std::vector<label_type>& labels +) +{ + // Make four different vectors. We will use them to make example assignments. + column_vector A(num_dims), B(num_dims), C(num_dims), D(num_dims); + A = 1,0,0; + B = 0,1,0; + C = 0,0,1; + D = 0,1,1; + + std::vector<column_vector> lhs; + std::vector<column_vector> rhs; + label_type mapping; + + // In all the assignments to follow, we will only say an element of the LHS + // matches an element of the RHS if the two are equal. So A matches with A, + // B with B, etc. But never A with C, for example. + // ------------------------ + + lhs.resize(3); + lhs[0] = A; + lhs[1] = B; + lhs[2] = C; + + rhs.resize(3); + rhs[0] = B; + rhs[1] = A; + rhs[2] = C; + + mapping.resize(3); + mapping[0] = 1; // lhs[0] matches rhs[1] + mapping[1] = 0; // lhs[1] matches rhs[0] + mapping[2] = 2; // lhs[2] matches rhs[2] + + samples.push_back(make_pair(lhs,rhs)); + labels.push_back(mapping); + + // ------------------------ + + lhs[0] = C; + lhs[1] = A; + lhs[2] = B; + + rhs[0] = A; + rhs[1] = B; + rhs[2] = D; + + mapping[0] = -1; // The -1 indicates that lhs[0] doesn't match anything in rhs. + mapping[1] = 0; // lhs[1] matches rhs[0] + mapping[2] = 1; // lhs[2] matches rhs[1] + + samples.push_back(make_pair(lhs,rhs)); + labels.push_back(mapping); + + // ------------------------ + + lhs[0] = A; + lhs[1] = B; + lhs[2] = C; + + rhs.resize(4); + rhs[0] = C; + rhs[1] = B; + rhs[2] = A; + rhs[3] = D; + + mapping[0] = 2; + mapping[1] = 1; + mapping[2] = 0; + + samples.push_back(make_pair(lhs,rhs)); + labels.push_back(mapping); + + // ------------------------ + + lhs.resize(2); + lhs[0] = B; + lhs[1] = C; + + rhs.resize(3); + rhs[0] = C; + rhs[1] = A; + rhs[2] = D; + + mapping.resize(2); + mapping[0] = -1; + mapping[1] = 0; + + samples.push_back(make_pair(lhs,rhs)); + labels.push_back(mapping); + + // ------------------------ + + lhs.resize(3); + lhs[0] = D; + lhs[1] = B; + lhs[2] = C; + + // rhs will be empty. So none of the items in lhs can match anything. + rhs.resize(0); + + mapping.resize(3); + mapping[0] = -1; + mapping[1] = -1; + mapping[2] = -1; + + samples.push_back(make_pair(lhs,rhs)); + labels.push_back(mapping); + +} + diff --git a/ml/dlib/examples/bayes_net_ex.cpp b/ml/dlib/examples/bayes_net_ex.cpp new file mode 100644 index 00000000..64f2ad95 --- /dev/null +++ b/ml/dlib/examples/bayes_net_ex.cpp @@ -0,0 +1,307 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This is an example illustrating the use of the Bayesian Network + inference utilities found in the dlib C++ library. + + + In this example all the nodes in the Bayesian network are + boolean variables. That is, they take on either the value + 0 or the value 1. + + The network contains 4 nodes and looks as follows: + + B C + \\ // + \/ \/ + A + || + \/ + D + + + The probabilities of each node are summarized below. (The probability + of each node being 0 is not listed since it is just P(X=0) = 1-p(X=1) ) + + p(B=1) = 0.01 + + p(C=1) = 0.001 + + p(A=1 | B=0, C=0) = 0.01 + p(A=1 | B=0, C=1) = 0.5 + p(A=1 | B=1, C=0) = 0.9 + p(A=1 | B=1, C=1) = 0.99 + + p(D=1 | A=0) = 0.2 + p(D=1 | A=1) = 0.5 + +*/ + + +#include <dlib/bayes_utils.h> +#include <dlib/graph_utils.h> +#include <dlib/graph.h> +#include <dlib/directed_graph.h> +#include <iostream> + + +using namespace dlib; +using namespace std; + +// ---------------------------------------------------------------------------------------- + +int main() +{ + try + { + // There are many useful convenience functions in this namespace. They all + // perform simple access or modify operations on the nodes of a bayesian network. + // You don't have to use them but they are convenient and they also will check for + // various errors in your bayesian network when your application is built with + // the DEBUG or ENABLE_ASSERTS preprocessor definitions defined. So their use + // is recommended. In fact, most of the global functions used in this example + // program are from this namespace. + using namespace bayes_node_utils; + + // This statement declares a bayesian network called bn. Note that a bayesian network + // in the dlib world is just a directed_graph object that contains a special kind + // of node called a bayes_node. + directed_graph<bayes_node>::kernel_1a_c bn; + + // Use an enum to make some more readable names for our nodes. + enum nodes + { + A = 0, + B = 1, + C = 2, + D = 3 + }; + + // The next few blocks of code setup our bayesian network. + + // The first thing we do is tell the bn object how many nodes it has + // and also add the three edges. Again, we are using the network + // shown in ASCII art at the top of this file. + bn.set_number_of_nodes(4); + bn.add_edge(A, D); + bn.add_edge(B, A); + bn.add_edge(C, A); + + + // Now we inform all the nodes in the network that they are binary + // nodes. That is, they only have two possible values. + set_node_num_values(bn, A, 2); + set_node_num_values(bn, B, 2); + set_node_num_values(bn, C, 2); + set_node_num_values(bn, D, 2); + + assignment parent_state; + // Now we will enter all the conditional probability information for each node. + // Each node's conditional probability is dependent on the state of its parents. + // To specify this state we need to use the assignment object. This assignment + // object allows us to specify the state of each nodes parents. + + + // Here we specify that p(B=1) = 0.01 + // parent_state is empty in this case since B is a root node. + set_node_probability(bn, B, 1, parent_state, 0.01); + // Here we specify that p(B=0) = 1-0.01 + set_node_probability(bn, B, 0, parent_state, 1-0.01); + + + // Here we specify that p(C=1) = 0.001 + // parent_state is empty in this case since B is a root node. + set_node_probability(bn, C, 1, parent_state, 0.001); + // Here we specify that p(C=0) = 1-0.001 + set_node_probability(bn, C, 0, parent_state, 1-0.001); + + + // This is our first node that has parents. So we set the parent_state + // object to reflect that A has both B and C as parents. + parent_state.add(B, 1); + parent_state.add(C, 1); + // Here we specify that p(A=1 | B=1, C=1) = 0.99 + set_node_probability(bn, A, 1, parent_state, 0.99); + // Here we specify that p(A=0 | B=1, C=1) = 1-0.99 + set_node_probability(bn, A, 0, parent_state, 1-0.99); + + // Here we use the [] notation because B and C have already + // been added into parent state. + parent_state[B] = 1; + parent_state[C] = 0; + // Here we specify that p(A=1 | B=1, C=0) = 0.9 + set_node_probability(bn, A, 1, parent_state, 0.9); + set_node_probability(bn, A, 0, parent_state, 1-0.9); + + parent_state[B] = 0; + parent_state[C] = 1; + // Here we specify that p(A=1 | B=0, C=1) = 0.5 + set_node_probability(bn, A, 1, parent_state, 0.5); + set_node_probability(bn, A, 0, parent_state, 1-0.5); + + parent_state[B] = 0; + parent_state[C] = 0; + // Here we specify that p(A=1 | B=0, C=0) = 0.01 + set_node_probability(bn, A, 1, parent_state, 0.01); + set_node_probability(bn, A, 0, parent_state, 1-0.01); + + + // Here we set probabilities for node D. + // First we clear out parent state so that it doesn't have any of + // the assignments for the B and C nodes used above. + parent_state.clear(); + parent_state.add(A,1); + // Here we specify that p(D=1 | A=1) = 0.5 + set_node_probability(bn, D, 1, parent_state, 0.5); + set_node_probability(bn, D, 0, parent_state, 1-0.5); + + parent_state[A] = 0; + // Here we specify that p(D=1 | A=0) = 0.2 + set_node_probability(bn, D, 1, parent_state, 0.2); + set_node_probability(bn, D, 0, parent_state, 1-0.2); + + + + // We have now finished setting up our bayesian network. So let's compute some + // probability values. The first thing we will do is compute the prior probability + // of each node in the network. To do this we will use the join tree algorithm which + // is an algorithm for performing exact inference in a bayesian network. + + // First we need to create an undirected graph which contains set objects at each node and + // edge. This long declaration does the trick. + typedef dlib::set<unsigned long>::compare_1b_c set_type; + typedef graph<set_type, set_type>::kernel_1a_c join_tree_type; + join_tree_type join_tree; + + // Now we need to populate the join_tree with data from our bayesian network. The next + // function calls do this. Explaining exactly what they do is outside the scope of this + // example. Just think of them as filling join_tree with information that is useful + // later on for dealing with our bayesian network. + create_moral_graph(bn, join_tree); + create_join_tree(join_tree, join_tree); + + // Now that we have a proper join_tree we can use it to obtain a solution to our + // bayesian network. Doing this is as simple as declaring an instance of + // the bayesian_network_join_tree object as follows: + bayesian_network_join_tree solution(bn, join_tree); + + + // now print out the probabilities for each node + cout << "Using the join tree algorithm:\n"; + cout << "p(A=1) = " << solution.probability(A)(1) << endl; + cout << "p(A=0) = " << solution.probability(A)(0) << endl; + cout << "p(B=1) = " << solution.probability(B)(1) << endl; + cout << "p(B=0) = " << solution.probability(B)(0) << endl; + cout << "p(C=1) = " << solution.probability(C)(1) << endl; + cout << "p(C=0) = " << solution.probability(C)(0) << endl; + cout << "p(D=1) = " << solution.probability(D)(1) << endl; + cout << "p(D=0) = " << solution.probability(D)(0) << endl; + cout << "\n\n\n"; + + + // Now to make things more interesting let's say that we have discovered that the C + // node really has a value of 1. That is to say, we now have evidence that + // C is 1. We can represent this in the network using the following two function + // calls. + set_node_value(bn, C, 1); + set_node_as_evidence(bn, C); + + // Now we want to compute the probabilities of all the nodes in the network again + // given that we now know that C is 1. We can do this as follows: + bayesian_network_join_tree solution_with_evidence(bn, join_tree); + + // now print out the probabilities for each node + cout << "Using the join tree algorithm:\n"; + cout << "p(A=1 | C=1) = " << solution_with_evidence.probability(A)(1) << endl; + cout << "p(A=0 | C=1) = " << solution_with_evidence.probability(A)(0) << endl; + cout << "p(B=1 | C=1) = " << solution_with_evidence.probability(B)(1) << endl; + cout << "p(B=0 | C=1) = " << solution_with_evidence.probability(B)(0) << endl; + cout << "p(C=1 | C=1) = " << solution_with_evidence.probability(C)(1) << endl; + cout << "p(C=0 | C=1) = " << solution_with_evidence.probability(C)(0) << endl; + cout << "p(D=1 | C=1) = " << solution_with_evidence.probability(D)(1) << endl; + cout << "p(D=0 | C=1) = " << solution_with_evidence.probability(D)(0) << endl; + cout << "\n\n\n"; + + // Note that when we made our solution_with_evidence object we reused our join_tree object. + // This saves us the time it takes to calculate the join_tree object from scratch. But + // it is important to note that we can only reuse the join_tree object if we haven't changed + // the structure of our bayesian network. That is, if we have added or removed nodes or + // edges from our bayesian network then we must recompute our join_tree. But in this example + // all we did was change the value of a bayes_node object (we made node C be evidence) + // so we are ok. + + + + + + // Next this example will show you how to use the bayesian_network_gibbs_sampler object + // to perform approximate inference in a bayesian network. This is an algorithm + // that doesn't give you an exact solution but it may be necessary to use in some + // instances. For example, the join tree algorithm used above, while fast in many + // instances, has exponential runtime in some cases. Moreover, inference in bayesian + // networks is NP-Hard for general networks so sometimes the best you can do is + // find an approximation. + // However, it should be noted that the gibbs sampler does not compute the correct + // probabilities if the network contains a deterministic node. That is, if any + // of the conditional probability tables in the bayesian network have a probability + // of 1.0 for something the gibbs sampler should not be used. + + + // This Gibbs sampler algorithm works by randomly sampling possibles values of the + // network. So to use it we should set the network to some initial state. + + set_node_value(bn, A, 0); + set_node_value(bn, B, 0); + set_node_value(bn, D, 0); + + // We will leave the C node with a value of 1 and keep it as an evidence node. + + + // First create an instance of the gibbs sampler object + bayesian_network_gibbs_sampler sampler; + + + // To use this algorithm all we do is go into a loop for a certain number of times + // and each time through we sample the bayesian network. Then we count how + // many times a node has a certain state. Then the probability of that node + // having that state is just its count/total times through the loop. + + // The following code illustrates the general procedure. + unsigned long A_count = 0; + unsigned long B_count = 0; + unsigned long C_count = 0; + unsigned long D_count = 0; + + // The more times you let the loop run the more accurate the result will be. Here we loop + // 2000 times. + const long rounds = 2000; + for (long i = 0; i < rounds; ++i) + { + sampler.sample_graph(bn); + + if (node_value(bn, A) == 1) + ++A_count; + if (node_value(bn, B) == 1) + ++B_count; + if (node_value(bn, C) == 1) + ++C_count; + if (node_value(bn, D) == 1) + ++D_count; + } + + cout << "Using the approximate Gibbs Sampler algorithm:\n"; + cout << "p(A=1 | C=1) = " << (double)A_count/(double)rounds << endl; + cout << "p(B=1 | C=1) = " << (double)B_count/(double)rounds << endl; + cout << "p(C=1 | C=1) = " << (double)C_count/(double)rounds << endl; + cout << "p(D=1 | C=1) = " << (double)D_count/(double)rounds << endl; + } + catch (std::exception& e) + { + cout << "exception thrown: " << endl; + cout << e.what() << endl; + cout << "hit enter to terminate" << endl; + cin.get(); + } +} + + + diff --git a/ml/dlib/examples/bayes_net_from_disk_ex.cpp b/ml/dlib/examples/bayes_net_from_disk_ex.cpp new file mode 100644 index 00000000..eaab5881 --- /dev/null +++ b/ml/dlib/examples/bayes_net_from_disk_ex.cpp @@ -0,0 +1,83 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This is an example illustrating the use of the Bayesian Network + inference utilities found in the dlib C++ library. In this example + we load a saved Bayesian Network from disk. +*/ + + +#include <dlib/bayes_utils.h> +#include <dlib/graph_utils.h> +#include <dlib/graph.h> +#include <dlib/directed_graph.h> +#include <iostream> +#include <fstream> + + +using namespace dlib; +using namespace std; + +// ---------------------------------------------------------------------------------------- + +int main(int argc, char** argv) +{ + try + { + // This statement declares a bayesian network called bn. Note that a bayesian network + // in the dlib world is just a directed_graph object that contains a special kind + // of node called a bayes_node. + directed_graph<bayes_node>::kernel_1a_c bn; + + if (argc != 2) + { + cout << "You must supply a file name on the command line. The file should " + << "contain a serialized Bayesian Network" << endl; + return 1; + } + + ifstream fin(argv[1],ios::binary); + + // Note that the saved networks produced by the bayes_net_gui_ex.cpp example can be deserialized + // into a network. So you can make your networks using that GUI if you like. + cout << "Loading the network from disk..." << endl; + deserialize(bn, fin); + + cout << "Number of nodes in the network: " << bn.number_of_nodes() << endl; + + // Let's compute some probability values using the loaded network using the join tree (aka. Junction + // Tree) algorithm. + + // First we need to create an undirected graph which contains set objects at each node and + // edge. This long declaration does the trick. + typedef graph<dlib::set<unsigned long>::compare_1b_c, dlib::set<unsigned long>::compare_1b_c>::kernel_1a_c join_tree_type; + join_tree_type join_tree; + + // Now we need to populate the join_tree with data from our bayesian network. The next two + // function calls do this. Explaining exactly what they do is outside the scope of this + // example. Just think of them as filling join_tree with information that is useful + // later on for dealing with our bayesian network. + create_moral_graph(bn, join_tree); + create_join_tree(join_tree, join_tree); + + // Now we have a proper join_tree we can use it to obtain a solution to our + // bayesian network. Doing this is as simple as declaring an instance of + // the bayesian_network_join_tree object as follows: + bayesian_network_join_tree solution(bn, join_tree); + + + // now print out the probabilities for each node + cout << "Using the join tree algorithm:\n"; + for (unsigned long i = 0; i < bn.number_of_nodes(); ++i) + { + // print out the probability distribution for node i. + cout << "p(node " << i <<") = " << solution.probability(i); + } + } + catch (exception& e) + { + cout << "exception thrown: " << e.what() << endl; + return 1; + } +} + + diff --git a/ml/dlib/examples/bayes_net_gui_ex.cpp b/ml/dlib/examples/bayes_net_gui_ex.cpp new file mode 100644 index 00000000..81101912 --- /dev/null +++ b/ml/dlib/examples/bayes_net_gui_ex.cpp @@ -0,0 +1,989 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This is a rather involved example illustrating the use of the GUI api from + the dlib C++ Library. This program is a fully functional utility for + creating Bayesian Networks. It allows the user to graphically draw the network, + save/load the network to/from disk, and also to calculate the posterior + probability of any node in the network given a set of evidence. + + This is not the first dlib example program you should be looking at. If you + want to see a simpler GUI example please look at the gui_api_ex.cpp or + image_ex.cpp example. + + If you want to understand how to use the Bayesian Network utilities in the library + you should definitely look at the bayes_net_ex.cpp example program. It gives a + comprehensive introduction to creating and manipulating Bayesian Networks. If you + want to see how to load a saved network from disk and use it in a non-GUI application + then look at the bayes_net_from_disk_ex.cpp example. + + + Now all of that being said, if you have already looked at the other relevant + examples and want to see a more in-depth example then by all means, continue reading. :) +*/ + +#include <memory> +#include <sstream> +#include <string> + +#include <dlib/gui_widgets.h> +#include <dlib/directed_graph.h> +#include <dlib/string.h> +#include <dlib/bayes_utils.h> +#include <dlib/set.h> +#include <dlib/graph_utils.h> +#include <dlib/stl_checked.h> + + +using namespace std; +using namespace dlib; +using namespace dlib::bayes_node_utils; + +// ---------------------------------------------------------------------------- + +typedef directed_graph<bayes_node>::kernel_1a_c directed_graph_type; +typedef directed_graph<bayes_node>::kernel_1a_c::node_type node_type; +typedef graph<dlib::set<unsigned long>::compare_1b_c, dlib::set<unsigned long>::compare_1b_c>::kernel_1a_c join_tree_type; + +// ---------------------------------------------------------------------------- + +class main_window : public drawable_window +{ + /*! + INITIAL VALUE + This window starts out hidden and with an empty Bayesian Network + + WHAT THIS OBJECT REPRESENTS + This object is the main window of a utility for drawing Bayesian Networks. + It allows you to draw a directed graph and to set the conditional probability + tables up for each node in the network. It also allows you to compute the + posterior probability of each node. And finally, it lets you save and load + networks from file + !*/ +public: + main_window(); + ~main_window(); + +private: + + // Private helper methods + + void initialize_node_cpt_if_necessary ( unsigned long index ); + void load_selected_node_tables_into_cpt_grid (); + void load_selected_node_tables_into_ppt_grid (); + void no_node_selected (); + + + // Event handlers + + void on_cpt_grid_modified(unsigned long row, unsigned long col); + void on_evidence_toggled (); + void on_graph_modified (); + void on_menu_file_open (); + void on_menu_file_quit (); + void on_menu_file_save (); + void on_menu_file_save_as (); + void on_menu_help_about (); + void on_menu_help_help (); + void on_node_deleted (); + void on_node_deselected ( unsigned long n ); + void on_node_selected (unsigned long n); + void on_open_file_selected ( const std::string& file_name); + void on_save_file_selected ( const std::string& file_name); + void on_sel_node_evidence_modified (); + void on_sel_node_num_values_modified (); + void on_sel_node_text_modified (); + void on_window_resized (); + void recalculate_probabilities (); + + // Member data + + const rgb_pixel color_non_evidence; + const rgb_pixel color_default_bg; + const rgb_pixel color_evidence; + const rgb_pixel color_error; + const rgb_pixel color_gray; + bool graph_modified_since_last_recalc; + + button btn_calculate; + check_box sel_node_is_evidence; + directed_graph_drawer<directed_graph_type> graph_drawer; + label sel_node_index; + label sel_node_num_values_label; + label sel_node_text_label; + label sel_node_evidence_label; + menu_bar mbar; + named_rectangle selected_node_rect; + tabbed_display tables; + text_field sel_node_num_values; + text_field sel_node_text; + text_field sel_node_evidence; + text_grid cpt_grid; + text_grid ppt_grid; + unsigned long selected_node_index; + bool node_is_selected; + widget_group cpt_group; + widget_group ppt_group; + + std::unique_ptr<bayesian_network_join_tree> solution; + join_tree_type join_tree; + // The std_vector_c is an object identical to the std::vector except that it checks + // all its preconditions and throws a dlib::fatal_error if they are violated. + std_vector_c<assignment> cpt_grid_assignments; + std::string graph_file_name; +}; + +// ---------------------------------------------------------------------------------------- + +int main() +{ + // create our window + main_window my_window; + + // tell our window to put itself on the screen + my_window.show(); + + // wait until the user closes this window before we let the program + // terminate. + my_window.wait_until_closed(); +} + +// ---------------------------------------------------------------------------------------- + +#ifdef WIN32 +// If you use main() as your entry point when building a program on MS Windows then +// there will be a black console window associated with your application. If you +// want your application to not have this console window then you need to build +// using the WinMain() entry point as shown below and also set your compiler to +// produce a "Windows" project instead of a "Console" project. In visual studio +// this can be accomplished by going to project->properties->general configuration-> +// Linker->System->SubSystem and selecting Windows instead of Console. +// +int WINAPI WinMain (HINSTANCE, HINSTANCE, PSTR cmds, int) +{ + main(); + return 0; +} +#endif + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- +// Methods from the main_window object +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + +main_window:: +main_window( +) : + color_non_evidence(0,0,0), + color_default_bg(255,255,255), + color_evidence(100,200,100), + color_error(255,0,0), + color_gray(210,210,210), + graph_modified_since_last_recalc(true), + btn_calculate(*this), + sel_node_is_evidence(*this), + graph_drawer(*this), + sel_node_index(*this), + sel_node_num_values_label (*this), + sel_node_text_label(*this), + sel_node_evidence_label(*this), + mbar(*this), + selected_node_rect(*this), + tables(*this), + sel_node_num_values(*this), + sel_node_text(*this), + sel_node_evidence(*this), + cpt_grid(*this), + ppt_grid(*this), + selected_node_index(0), + node_is_selected(false), + cpt_group(*this), + ppt_group(*this) +{ + // Note that all the GUI widgets take a reference to the window that contains them + // as their constructor argument. This is a universal feature of GUI widgets in the + // dlib library. + + set_title("Bayesian Network Utility"); + + // position the widget that is responsible for drawing the directed graph, the graph_drawer, + // just below the mbar (menu bar) widget. + graph_drawer.set_pos(5,mbar.bottom()+5); + set_size(750,400); + + // register the event handlers with their respective widgets + btn_calculate.set_click_handler (*this, &main_window::recalculate_probabilities); + cpt_grid.set_text_modified_handler (*this, &main_window::on_cpt_grid_modified); + graph_drawer.set_graph_modified_handler (*this, &main_window::on_graph_modified); + graph_drawer.set_node_deleted_handler (*this, &main_window::on_node_deleted); + graph_drawer.set_node_deselected_handler (*this, &main_window::on_node_deselected); + graph_drawer.set_node_selected_handler (*this, &main_window::on_node_selected); + sel_node_evidence.set_text_modified_handler (*this, &main_window::on_sel_node_evidence_modified); + sel_node_is_evidence.set_click_handler (*this, &main_window::on_evidence_toggled); + sel_node_num_values.set_text_modified_handler(*this, &main_window::on_sel_node_num_values_modified); + sel_node_text.set_text_modified_handler (*this, &main_window::on_sel_node_text_modified); + + // now set the text of some of our buttons and labels + btn_calculate.set_name("Recalculate posterior probability table"); + selected_node_rect.set_name("Selected node"); + sel_node_evidence_label.set_text("evidence value:"); + sel_node_is_evidence.set_name("is evidence"); + sel_node_num_values_label.set_text("Number of values: "); + sel_node_text_label.set_text("Node label:"); + + // Now setup the tabbed display. It will have two tabs, one for the conditional + // probability table and one for the posterior probability table. + tables.set_number_of_tabs(2); + tables.set_tab_name(0,"Conditional probability table"); + tables.set_tab_name(1,"Posterior probability table"); + cpt_group.add(cpt_grid,0,0); + ppt_group.add(ppt_grid,0,0); + tables.set_tab_group(0,cpt_group); + tables.set_tab_group(1,ppt_group); + + // Now setup the menu bar. We will have two menus. A File and Help menu. + mbar.set_number_of_menus(2); + mbar.set_menu_name(0,"File",'F'); + mbar.set_menu_name(1,"Help",'H'); + + // add the entries to the File menu. + mbar.menu(0).add_menu_item(menu_item_text("Open", *this, &main_window::on_menu_file_open, 'O')); + mbar.menu(0).add_menu_item(menu_item_separator()); + mbar.menu(0).add_menu_item(menu_item_text("Save", *this, &main_window::on_menu_file_save, 'S')); + mbar.menu(0).add_menu_item(menu_item_text("Save As",*this, &main_window::on_menu_file_save_as, 'a')); + mbar.menu(0).add_menu_item(menu_item_separator()); + mbar.menu(0).add_menu_item(menu_item_text("Quit", *this, &main_window::on_menu_file_quit, 'Q')); + + // Add the entries to the Help menu. + mbar.menu(1).add_menu_item(menu_item_text("Help", *this, &main_window::on_menu_help_help, 'e')); + mbar.menu(1).add_menu_item(menu_item_text("About", *this, &main_window::on_menu_help_about, 'A')); + + + // call our helper functions and window resize event to get the widgets + // to all arrange themselves correctly in our window. + no_node_selected(); + on_window_resized(); +} + +// ---------------------------------------------------------------------------------------- + +main_window:: +~main_window( +) +{ + // You should always call close_window() in the destructor of window + // objects to ensure that no events will be sent to this window while + // it is being destructed. + close_window(); +} + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- +// Private methods from the main_window object +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + +void main_window:: +load_selected_node_tables_into_ppt_grid ( +) +{ + // This function just takes the currently selected graph node and loads + // its posterior probabilities into the ppt_graph widget. + node_type& node = graph_drawer.graph_node(selected_node_index); + ppt_grid.set_grid_size(2,node.data.table().num_values()); + + // load the top row of the table into the grid. This row is the "title bar" row + // that tells you what each column contains. + for (unsigned long col = 0; col < node.data.table().num_values(); ++col) + { + ppt_grid.set_text(0,col,"P(node=" + cast_to_string(col) + ")"); + ppt_grid.set_background_color(0,col,rgb_pixel(150,150,250)); + ppt_grid.set_editable(0,col,false); + } + + // If we have a solution to the network on hand then load the probabilities + // from that into the table + if (solution) + { + // get the probability distribution for the currently selected node out + // of the solution. + const matrix<double,1> prob = solution->probability(selected_node_index); + + // now load the probabilities into the ppt_grid so the user can see them. + for (unsigned long col = 0; col < node.data.table().num_values(); ++col) + { + ppt_grid.set_text(1,col,cast_to_string(prob(col))); + } + } + + // make the second row of the table non-editable have a color that indicates + // that to the user + for (unsigned long col = 0; col < node.data.table().num_values(); ++col) + { + ppt_grid.set_background_color(1,col,color_gray); + ppt_grid.set_editable(1,col,false); + } +} + +// ---------------------------------------------------------------------------------------- + +void main_window:: +load_selected_node_tables_into_cpt_grid ( +) +{ + // This function just takes the conditional probability table in the + // currently selected graph node and puts it into the cpt_grid widget. + + node_type& node = graph_drawer.graph_node(selected_node_index); + + initialize_node_cpt_if_necessary(selected_node_index); + cpt_grid_assignments.clear(); + + // figure out how many rows there should be in the cpt + unsigned long cpt_rows = 1; + for (unsigned long i = 0; i < node.number_of_parents(); ++i) + { + cpt_rows *= node.parent(i).data.table().num_values(); + } + + unsigned long cpt_cols = node.data.table().num_values(); + + cpt_grid.set_grid_size(cpt_rows+1, cpt_cols+ node.number_of_parents()); + const unsigned long num_cols = cpt_grid.number_of_columns(); + + // fill in the top row of the grid that shows which parent node the left hand columns go with + assignment a(node_first_parent_assignment(graph_drawer.graph(),selected_node_index)); + unsigned long col = 0; + a.reset(); + while (a.move_next()) + { + cpt_grid.set_text(0,col,cast_to_string(a.element().key()) + ": " + graph_drawer.node_label(a.element().key()) ); + cpt_grid.set_background_color(0,col,rgb_pixel(120,210,210)); + cpt_grid.set_editable(0,col,false); + ++col; + } + + // fill in the top row of the grid that shows which probability the right hand columns go with + for (col = node.number_of_parents(); col < num_cols; ++col) + { + cpt_grid.set_text(0,col,"P(node=" + cast_to_string(col-node.number_of_parents()) + ")"); + cpt_grid.set_background_color(0,col,rgb_pixel(150,150,250)); + cpt_grid.set_editable(0,col,false); + } + + // now loop over all the possible parent assignments for this node + const unsigned long num_values = node.data.table().num_values(); + unsigned long row = 1; + do + { + col = 0; + + // fill in the left side of the grid row that shows what the parent assignment is + a.reset(); + while (a.move_next()) + { + cpt_grid.set_text(row,col,cast_to_string(a.element().value())); + cpt_grid.set_background_color(row,col,rgb_pixel(180,255,255)); + cpt_grid.set_editable(row,col,false); + + ++col; + } + + // fill in the right side of the grid row that shows what the conditional probabilities are + for (unsigned long value = 0; value < num_values; ++value) + { + const double prob = node.data.table().probability(value,a); + cpt_grid.set_text(row,col,cast_to_string(prob)); + ++col; + } + + // save this assignment so we can use it later to modify the node's + // conditional probability table if the user modifies the cpt_grid + cpt_grid_assignments.push_back(a); + ++row; + } while (node_next_parent_assignment(graph_drawer.graph(),selected_node_index,a)); + +} + +// ---------------------------------------------------------------------------------------- + +void main_window:: +initialize_node_cpt_if_necessary ( + unsigned long index +) +{ + node_type& node = graph_drawer.graph_node(index); + + // if the cpt for this node isn't properly filled out then let's clear it out + // and populate it with some reasonable default values + if (node_cpt_filled_out(graph_drawer.graph(), index) == false) + { + node.data.table().empty_table(); + + const unsigned long num_values = node.data.table().num_values(); + + // loop over all the possible parent assignments for this node and fill them out + // with reasonable default values + assignment a(node_first_parent_assignment(graph_drawer.graph(), index)); + do + { + // set the first value to have probability 1 + node.data.table().set_probability(0, a, 1.0); + + // set all the other values to have probability 0 + for (unsigned long value = 1; value < num_values; ++value) + node.data.table().set_probability(value, a, 0); + + } while (node_next_parent_assignment(graph_drawer.graph(), index,a)); + } +} + +// ---------------------------------------------------------------------------------------- + +void main_window:: +no_node_selected ( +) +{ + // Make it so that no node is selected on the gui. Do this by disabling things + // and clearing out text fields and so forth. + + + node_is_selected = false; + tables.disable(); + sel_node_evidence.disable(); + sel_node_is_evidence.disable(); + sel_node_index.disable(); + sel_node_evidence_label.disable(); + sel_node_text_label.disable(); + sel_node_text.disable(); + sel_node_index.set_text("index:"); + sel_node_num_values_label.disable(); + sel_node_num_values.disable(); + cpt_grid.set_grid_size(0,0); + ppt_grid.set_grid_size(0,0); + + sel_node_is_evidence.set_unchecked(); + sel_node_text.set_text(""); + sel_node_num_values.set_text(""); + sel_node_evidence.set_text(""); + sel_node_num_values.set_background_color(color_default_bg); + sel_node_evidence.set_background_color(color_default_bg); +} + +// ---------------------------------------------------------------------------------------- + +void main_window:: +recalculate_probabilities ( +) +{ + // clear out the current solution + solution.reset(); + if (graph_is_connected(graph_drawer.graph()) == false) + { + message_box("Error","Your graph has nodes that are completely disconnected from the other nodes.\n" + "You must connect them somehow"); + } + else if (graph_drawer.graph().number_of_nodes() > 0) + { + if (graph_modified_since_last_recalc) + { + // make sure all the cpts are filled out + const unsigned long num_nodes = graph_drawer.graph().number_of_nodes(); + for (unsigned long i = 0; i < num_nodes; ++i) + { + initialize_node_cpt_if_necessary(i); + } + + // remake the join tree for this graph + create_moral_graph(graph_drawer.graph(), join_tree); + create_join_tree(join_tree, join_tree); + graph_modified_since_last_recalc = false; + } + + // create a solution to this bayesian network using the join tree algorithm + solution.reset(new bayesian_network_join_tree(graph_drawer.graph(), join_tree)); + + if (node_is_selected) + { + load_selected_node_tables_into_ppt_grid(); + } + } +} + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- +// Event handling methods from the main_window object +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + +// This event is called when the user selects a file with a saved +// bayesian network in it. +void main_window:: +on_open_file_selected ( + const std::string& file_name +) +{ + try + { + no_node_selected(); + ifstream fin(file_name.c_str(), ios::binary); + graph_drawer.load_graph(fin); + graph_file_name = file_name; + set_title("Bayesian Network Utility - " + right_substr(file_name,"\\/")); + } + catch (...) + { + message_box("Error", "Unable to load graph file " + file_name); + } +} + +// ---------------------------------------------------------------------------------------- + +// This event is called when the user selects from the menu bar File->Open +void main_window:: +on_menu_file_open ( +) +{ + // display a file chooser window and when the user choses a file + // call the on_open_file_selected() function + open_existing_file_box(*this, &main_window::on_open_file_selected); +} + +// ---------------------------------------------------------------------------------------- + +// This event is called when the user selects from the menu bar File->Save +void main_window:: +on_menu_file_save ( +) +{ + // if we don't currently have any file name associated with our graph + if (graph_file_name.size() == 0) + { + // display a file chooser window and when the user choses a file + // call the on_save_file_selected() function + save_file_box(*this, &main_window::on_save_file_selected); + } + else + { + // we know what file to open so just do that and save the graph to it + ofstream fout(graph_file_name.c_str(), ios::binary); + graph_drawer.save_graph(fout); + } +} + +// ---------------------------------------------------------------------------------------- + +// This event is called when the user choses which file to save the graph to +void main_window:: +on_save_file_selected ( + const std::string& file_name +) +{ + ofstream fout(file_name.c_str(), ios::binary); + graph_drawer.save_graph(fout); + graph_file_name = file_name; + set_title("Bayesian Network Utility - " + right_substr(file_name,"\\/")); +} + +// ---------------------------------------------------------------------------------------- + +// This event is called when the user selects from the menu bar File->Save As +void main_window:: +on_menu_file_save_as ( +) +{ + // display a file chooser window and when the user choses a file + // call the on_save_file_selected() function + save_file_box(*this, &main_window::on_save_file_selected); +} + +// ---------------------------------------------------------------------------------------- + +// This event is called when the user selects from the menu bar File->Quit +void main_window:: +on_menu_file_quit ( +) +{ + close_window(); +} + +// ---------------------------------------------------------------------------------------- + +// This event is called when the user selects from the menu bar Help->Help +void main_window:: +on_menu_help_help ( +) +{ + message_box("Help", + "To create new nodes right click on the drawing area.\n" + "To create edges select the parent node and then shift+left click on the child node.\n" + "To remove nodes or edges select them by left clicking and then press the delete key."); +} + +// ---------------------------------------------------------------------------------------- + +// This event is called when the user selects from the menu bar Help->About +void main_window:: +on_menu_help_about ( +) +{ + message_box("About","This application is the GUI front end to the dlib C++ Library's\n" + "Bayesian Network inference utilities\n\n" + "Version 1.2\n\n" + "See http://dlib.net for updates"); +} + +// ---------------------------------------------------------------------------------------- + +// This event is called when the user modifies the graph_drawer widget. That is, +// when the user adds or removes an edge or node in the graph. +void main_window:: +on_graph_modified ( +) +{ + // make note of the modification + graph_modified_since_last_recalc = true; + // clear out the solution object since we will need to recalculate it + // since the graph changed + solution.reset(); +} + +// ---------------------------------------------------------------------------------------- + +// This event is called when the user modifies the evidence value for a node +void main_window:: +on_sel_node_evidence_modified ( +) +{ + // make a reference to the node in the graph that is currently selected + node_type& node = graph_drawer.graph_node(selected_node_index); + unsigned long value; + try + { + // get the numerical value of the new evidence value. Here we are taking + // the string from the text field and casting it to an unsigned long. + value = sa = trim(sel_node_evidence.text()); + } + catch (string_cast_error&) + { + // if the user put something that isn't an integer into the + // text field then make it have a different background color + // so that they can easily see this. + sel_node_evidence.set_background_color(color_error); + return; + } + + // validate the input from the user and store it in the selected node + // if it is ok + if (value >= node.data.table().num_values()) + { + sel_node_evidence.set_background_color(color_error); + } + else + { + node.data.set_value(value); + sel_node_evidence.set_background_color(color_default_bg); + } + + // clear out the solution to the graph since we now need + // to recalculate it. + solution.reset(); +} + +// ---------------------------------------------------------------------------------------- + +// This event is called when the user modifies the number of evidence values for +// a node. +void main_window:: +on_sel_node_num_values_modified ( +) +{ + // make a reference to the node in the graph that is currently selected + node_type& node = graph_drawer.graph_node(selected_node_index); + + unsigned long num_values; + try + { + // get the number of values out of the text field. + num_values = sa = trim(sel_node_num_values.text()); + } + catch (string_cast_error&) + { + sel_node_num_values.set_background_color(color_error); + return; + } + + // validate the input from the user to make sure it is something reasonable + if (num_values < 2 || num_values > 100) + { + sel_node_num_values.set_background_color(color_error); + } + else + { + // update the graph + node.data.table().set_num_values(num_values); + graph_modified_since_last_recalc = true; + sel_node_num_values.set_background_color(color_default_bg); + + on_sel_node_evidence_modified(); + // also make sure the evidence value of this node makes sense still + if (node.data.is_evidence() && node.data.value() >= num_values) + { + // just set it to zero + node.data.set_value(0); + } + + } + + solution.reset(); + + // call these functions so that the conditional and posterior probability + // tables get updated + load_selected_node_tables_into_cpt_grid(); + load_selected_node_tables_into_ppt_grid(); +} + +// ---------------------------------------------------------------------------------------- + +// This event is called when the user modifies the cpt_grid (i.e. the conditional +// probability table widget) +void main_window:: +on_cpt_grid_modified(unsigned long row, unsigned long col) +{ + node_type& node = graph_drawer.graph_node(selected_node_index); + solution.reset(); + + double prob; + try + { + // get the new value out of the table + prob = sa = cpt_grid.text(row,col); + } + catch (string_cast_error&) + { + cpt_grid.set_background_color(row,col,color_error); + return; + } + + // validate the value + if (prob < 0 || prob > 1) + { + cpt_grid.set_background_color(row,col,color_error); + return; + } + + // the value of this node that is having its conditional probability + // updated + const unsigned long cur_val = col-node.number_of_parents(); + + node.data.table().set_probability(cur_val, cpt_grid_assignments[row-1], prob); + + // sum the probabilities in the cpt and modify the last one such that they all + // sum to 1. We are excluding either the first or last element from the sum + // because we are going to set it equal to 1-sum below. + double sum = 0; + if (cur_val != node.data.table().num_values()-1) + { + for (unsigned long i = 0; i < node.data.table().num_values()-1; ++i) + sum += node.data.table().probability(i, cpt_grid_assignments[row-1]); + } + else + { + for (unsigned long i = 1; i < node.data.table().num_values(); ++i) + sum += node.data.table().probability(i, cpt_grid_assignments[row-1]); + } + + // make sure all the probabilities sum to 1 + if (sum > 1.0) + { + cpt_grid.set_background_color(row,cpt_grid.number_of_columns()-1,color_error); + } + else + { + // edit one of the other elements in the table to ensure that the probabilities still sum to 1 + if (cur_val == node.data.table().num_values()-1) + { + node.data.table().set_probability(0, cpt_grid_assignments[row-1], 1-sum); + cpt_grid.set_text(row,node.number_of_parents(),cast_to_string(1-sum)); + } + else + { + node.data.table().set_probability(node.data.table().num_values()-1, cpt_grid_assignments[row-1], 1-sum); + cpt_grid.set_text(row,cpt_grid.number_of_columns()-1,cast_to_string(1-sum)); + } + + cpt_grid.set_background_color(row,cpt_grid.number_of_columns()-1,color_default_bg); + cpt_grid.set_background_color(row,col,color_default_bg); + } + +} + +// ---------------------------------------------------------------------------------------- + +// This event is called when the user resizes the main_window. Note that unlike the other +// events, this event is part of the drawable_window base class that main_window inherits from. +// So you won't see any statements in the constructor that say "register the main_window::on_window_resized function" +void main_window:: +on_window_resized () +{ + // when you override any of the drawable_window events you have to make sure you + // call the drawable_window's version of them because it needs to process + // the events as well. So we do that here. + drawable_window::on_window_resized(); + + // The rest of this function positions the widgets on the window + unsigned long width,height; + get_size(width,height); + + // Don't do anything if the user just made the window too small. That is, leave + // the widgets where they are. + if (width < 500 || height < 350) + return; + + // Set the size of the probability tables and the drawing area for the graph + graph_drawer.set_size(width-370,height-10-mbar.height()); + cpt_grid.set_size((width-graph_drawer.width())-35,height-237); + ppt_grid.set_size((width-graph_drawer.width())-35,height-237); + // tell the tabbed display to make itself just the right size to contain + // the two probability tables. + tables.fit_to_contents(); + + + // Now position all the widgets in the window. Note that much of the positioning + // is relative to other widgets. This part of the code I just figured out by + // trying stuff and rerunning the program to see if it looked nice. + sel_node_index.set_pos(graph_drawer.right()+14,graph_drawer.top()+18); + sel_node_text_label.set_pos(sel_node_index.left(),sel_node_index.bottom()+5); + sel_node_text.set_pos(sel_node_text_label.right()+5,sel_node_index.bottom()); + sel_node_num_values_label.set_pos(sel_node_index.left(), sel_node_text.bottom()+5); + sel_node_num_values.set_pos(sel_node_num_values_label.right(), sel_node_text.bottom()+5); + sel_node_is_evidence.set_pos(sel_node_index.left(),sel_node_num_values.bottom()+5); + sel_node_evidence_label.set_pos(sel_node_index.left(),sel_node_is_evidence.bottom()+5); + sel_node_evidence.set_pos(sel_node_evidence_label.right()+5,sel_node_is_evidence.bottom()); + tables.set_pos(sel_node_index.left(),sel_node_evidence.bottom()+5); + sel_node_evidence.set_width(tables.right()-sel_node_evidence.left()+1); + sel_node_text.set_width(tables.right()-sel_node_text.left()+1); + sel_node_num_values.set_width(tables.right()-sel_node_num_values.left()+1); + + + + // Tell the named rectangle to position itself such that it fits around the + // tabbed display that contains the probability tables and the label at the top of the + // screen. + selected_node_rect.wrap_around(sel_node_index.get_rect()+ + tables.get_rect()); + + // finally set the button to be at the bottom of the named rectangle + btn_calculate.set_pos(selected_node_rect.left(), selected_node_rect.bottom()+5); +} + +// ---------------------------------------------------------------------------------------- + +// This event is called by the graph_drawer widget when the user selects a node +void main_window:: +on_node_selected (unsigned long n) +{ + // make a reference to the selected node + node_type& node = graph_drawer.graph_node(n); + + + // enable all the widgets related to the selected node + selected_node_index = n; + node_is_selected = true; + tables.enable(); + sel_node_is_evidence.enable(); + sel_node_index.enable(); + sel_node_evidence_label.enable(); + sel_node_text_label.enable(); + sel_node_text.enable(); + sel_node_num_values_label.enable(); + sel_node_num_values.enable(); + + // make sure the num_values field of the node's cpt is set to something valid. + // So default it to 2 if it isn't set already. + if (node.data.table().num_values() < 2) + { + node.data.table().set_num_values(2); + graph_modified_since_last_recalc = true; + } + + // setup the evidence check box and input field + sel_node_index.set_text("index: " + cast_to_string(n)); + if (graph_drawer.graph_node(n).data.is_evidence()) + { + sel_node_is_evidence.set_checked(); + sel_node_evidence.enable(); + sel_node_evidence.set_text(cast_to_string(graph_drawer.graph_node(n).data.value())); + } + else + { + sel_node_is_evidence.set_unchecked(); + sel_node_evidence.disable(); + sel_node_evidence.set_text(""); + } + + sel_node_num_values.set_text(cast_to_string(node_num_values(graph_drawer.graph(),n))); + + sel_node_text.set_text(graph_drawer.node_label(n)); + + load_selected_node_tables_into_cpt_grid(); + load_selected_node_tables_into_ppt_grid(); +} + +// ---------------------------------------------------------------------------------------- + +// This event is called when the user toggles the "is evidence" check box +void main_window:: +on_evidence_toggled ( +) +{ + if (sel_node_is_evidence.is_checked()) + { + graph_drawer.graph_node(selected_node_index).data.set_as_evidence(); + sel_node_evidence.enable(); + sel_node_evidence.set_text(cast_to_string(graph_drawer.graph_node(selected_node_index).data.value())); + + graph_drawer.set_node_color(selected_node_index, color_evidence); + } + else + { + graph_drawer.graph_node(selected_node_index).data.set_as_nonevidence(); + sel_node_evidence.disable(); + sel_node_evidence.set_text(""); + sel_node_evidence.set_background_color(color_default_bg); + graph_drawer.set_node_color(selected_node_index, color_non_evidence); + } + solution.reset(); +} + +// ---------------------------------------------------------------------------------------- + +// This event is called when the user causes no node to be selected +void main_window:: +on_node_deselected ( unsigned long ) +{ + no_node_selected(); +} + +// ---------------------------------------------------------------------------------------- + +// This event is called when the user causes a node to be deleted +void main_window:: +on_node_deleted ( ) +{ + no_node_selected(); +} + +// ---------------------------------------------------------------------------------------- + +// This event is called when the user changes the text in the "node label" text field +void main_window:: +on_sel_node_text_modified ( +) +{ + // set the selected node's text to match whatever the user just typed in + graph_drawer.set_node_label(selected_node_index,sel_node_text.text()); +} + +// ---------------------------------------------------------------------------------------- + diff --git a/ml/dlib/examples/bridge_ex.cpp b/ml/dlib/examples/bridge_ex.cpp new file mode 100644 index 00000000..bc772ccb --- /dev/null +++ b/ml/dlib/examples/bridge_ex.cpp @@ -0,0 +1,365 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt + + +/* + This is an example showing how to use the bridge object from from the + dlib C++ Library to send messages via TCP/IP. + + In particular, this example will walk you through four progressively + more complex use cases of the bridge object. Note that this example + program assumes you are already familiar with the pipe object and at + least the contents of the pipe_ex_2.cpp example program. +*/ + + +// =========== Example program output =========== +/* + ---- Running example 1 ---- + dequeued value: 1 + dequeued value: 2 + dequeued value: 3 + + ---- Running example 2 ---- + dequeued value: 1 + dequeued value: 2 + dequeued value: 3 + + ---- Running example 3 ---- + dequeued int: 1 + dequeued int: 2 + dequeued struct: 3 some string + + ---- Running example 4 ---- + bridge 1 status: is_connected: true + bridge 1 status: foreign_ip: 127.0.0.1 + bridge 1 status: foreign_port: 43156 + bridge 2 status: is_connected: true + bridge 2 status: foreign_ip: 127.0.0.1 + bridge 2 status: foreign_port: 12345 + dequeued int: 1 + dequeued int: 2 + dequeued struct: 3 some string + bridge 1 status: is_connected: false + bridge 1 status: foreign_ip: 127.0.0.1 + bridge 1 status: foreign_port: 12345 +*/ + + +#include <dlib/bridge.h> +#include <dlib/type_safe_union.h> +#include <iostream> + +using namespace dlib; +using namespace std; + +// ---------------------------------------------------------------------------------------- + +void run_example_1(); +void run_example_2(); +void run_example_3(); +void run_example_4(); + +// ---------------------------------------------------------------------------------------- + +int main() +{ + run_example_1(); + run_example_2(); + run_example_3(); + run_example_4(); +} + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + +void run_example_1( +) +{ + cout << "\n ---- Running example 1 ---- " << endl; + + /* + The idea of the bridge is basically to allow two different dlib::pipe objects + to be connected together via a TCP connection. This is best illustrated by + the following short example. In it we create two pipes, in and out. When + an object is enqueued into the out pipe it will be automatically sent + through a TCP connection and once received at the other end it will be + inserted into the in pipe. + */ + dlib::pipe<int> in(4), out(4); + + + // This bridge will listen on port 12345 for an incoming TCP connection. Then + // it will read data from that connection and put it into the in pipe. + bridge b2(listen_on_port(12345), receive(in)); + + // This bridge will initiate a TCP connection and then start dequeuing + // objects from out and transmitting them over the connection. + bridge b1(connect_to_ip_and_port("127.0.0.1", 12345), transmit(out)); + + // As an aside, in a real program, each of these bridges and pipes would be in a + // separate application. But to make this example self contained they are both + // right here. + + + + // Now let's put some things into the out pipe + int value = 1; + out.enqueue(value); + + value = 2; + out.enqueue(value); + + value = 3; + out.enqueue(value); + + + // Now those 3 ints can be dequeued from the in pipe. They will show up + // in the same order they were inserted into the out pipe. + in.dequeue(value); + cout << "dequeued value: "<< value << endl; + in.dequeue(value); + cout << "dequeued value: "<< value << endl; + in.dequeue(value); + cout << "dequeued value: "<< value << endl; +} + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + +void run_example_2( +) +{ + cout << "\n ---- Running example 2 ---- " << endl; + + /* + This example makes a simple echo server on port 12345. When an object + is inserted into the out pipe it will be sent over a TCP connection, get + put into the echo pipe and then immediately read out of the echo pipe and + sent back over the TCP connection where it will finally be placed into the in + pipe. + */ + + dlib::pipe<int> in(4), out(4), echo(4); + + // Just like TCP connections, a bridge can send data both directions. The directionality + // of a pipe is indicated by the receive() and transmit() type decorations. Also, the order + // they are listed doesn't matter. + bridge echo_bridge(listen_on_port(12345), receive(echo), transmit(echo)); + + // Note that you can also specify the ip and port as a string by using connect_to(). + bridge b1(connect_to("127.0.0.1:12345"), transmit(out), receive(in)); + + + int value = 1; + out.enqueue(value); + + value = 2; + out.enqueue(value); + + value = 3; + out.enqueue(value); + + + in.dequeue(value); + cout << "dequeued value: "<< value << endl; + in.dequeue(value); + cout << "dequeued value: "<< value << endl; + in.dequeue(value); + cout << "dequeued value: "<< value << endl; +} + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + +struct my_example_object +{ + /* + All objects passing through a dlib::bridge must be serializable. This + means there must exist global functions called serialize() and deserialize() + which can convert an object into a bit stream and then reverse the process. + + This example object illustrates how this is done. + */ + + int value; + std::string str; +}; + +void serialize (const my_example_object& item, std::ostream& out) +{ + /* + serialize() just needs to write the state of item to the output stream. + You can do this however you like. Below, I'm using the serialize functions + for int and std::string which come with dlib. But again, you can do whatever + you want here. + */ + dlib::serialize(item.value, out); + dlib::serialize(item.str, out); +} + +void deserialize (my_example_object& item, std::istream& in) +{ + /* + deserialize() is just the inverse of serialize(). Again, you can do + whatever you want here so long as it correctly reconstructs item. This + also means that deserialize() must always consume as many bytes as serialize() + generates. + */ + dlib::deserialize(item.value, in); + dlib::deserialize(item.str, in); +} + +// ---------------------------------------------------------------------------------------- + +void run_example_3( +) +{ + cout << "\n ---- Running example 3 ---- " << endl; + + /* + In this example we will just send ints and my_example_object objects + over a TCP connection. Since we are sending more than one type of + object through a pipe we will need to use the type_safe_union. + */ + + typedef type_safe_union<int, my_example_object> tsu_type; + + dlib::pipe<tsu_type> in(4), out(4); + + // Note that we don't have to start the listening bridge first. If b2 + // fails to make a connection it will just keep trying until successful. + bridge b2(connect_to("127.0.0.1:12345"), receive(in)); + // We don't have to configure a bridge in it's constructor. If it's + // more convenient we can do so by calling reconfigure() instead. + bridge b1; + b1.reconfigure(listen_on_port(12345), transmit(out)); + + tsu_type msg; + + msg = 1; + out.enqueue(msg); + + msg = 2; + out.enqueue(msg); + + msg.get<my_example_object>().value = 3; + msg.get<my_example_object>().str = "some string"; + out.enqueue(msg); + + + // dequeue the three objects we sent and print them on the screen. + for (int i = 0; i < 3; ++i) + { + in.dequeue(msg); + if (msg.contains<int>()) + { + cout << "dequeued int: "<< msg.get<int>() << endl; + } + else if (msg.contains<my_example_object>()) + { + cout << "dequeued struct: "<< msg.get<my_example_object>().value << " " + << msg.get<my_example_object>().str << endl; + } + } +} + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + +void run_example_4( +) +{ + cout << "\n ---- Running example 4 ---- " << endl; + + /* + This final example is the same as example 3 except we will also now be getting + status messages from the bridges. These bridge_status messages tell us the + state of the TCP connection associated with a bridge. Is it connected or not? + Who it is connected to? + + The way you get these status messages is by ensuring that your receive pipe is + capable of storing bridge_status objects. If it is then the bridge will + automatically insert bridge_status messages into your receive pipe whenever + there is a status change. + + There are only two kinds of status changes. The establishment of a connection + or the closing of a connection. Also, a connection which closes due to you + calling clear(), reconfigure(), or destructing a bridge does not generate a + status message since, in this case, you already know about it and just want + the bridge to destroy itself as quickly as possible. + */ + + + typedef type_safe_union<int, my_example_object, bridge_status> tsu_type; + + dlib::pipe<tsu_type> in(4), out(4); + dlib::pipe<bridge_status> b1_status(4); + + // setup both bridges to have receive pipes capable of holding bridge_status messages. + bridge b1(listen_on_port(12345), transmit(out), receive(b1_status)); + // Note that we can also use a hostname with connect_to() instead of supplying an IP address. + bridge b2(connect_to("localhost:12345"), receive(in)); + + tsu_type msg; + bridge_status bs; + + // Once a connection is established it will generate a status message from each bridge. + // Let's get those and print them. + b1_status.dequeue(bs); + cout << "bridge 1 status: is_connected: " << boolalpha << bs.is_connected << endl; + cout << "bridge 1 status: foreign_ip: " << bs.foreign_ip << endl; + cout << "bridge 1 status: foreign_port: " << bs.foreign_port << endl; + + in.dequeue(msg); + bs = msg.get<bridge_status>(); + cout << "bridge 2 status: is_connected: " << bs.is_connected << endl; + cout << "bridge 2 status: foreign_ip: " << bs.foreign_ip << endl; + cout << "bridge 2 status: foreign_port: " << bs.foreign_port << endl; + + + + msg = 1; + out.enqueue(msg); + + msg = 2; + out.enqueue(msg); + + msg.get<my_example_object>().value = 3; + msg.get<my_example_object>().str = "some string"; + out.enqueue(msg); + + + // Read the 3 things we sent over the connection. + for (int i = 0; i < 3; ++i) + { + in.dequeue(msg); + if (msg.contains<int>()) + { + cout << "dequeued int: "<< msg.get<int>() << endl; + } + else if (msg.contains<my_example_object>()) + { + cout << "dequeued struct: "<< msg.get<my_example_object>().value << " " + << msg.get<my_example_object>().str << endl; + } + } + + // cause bridge 1 to shutdown completely. This will close the connection and + // therefore bridge 2 will generate a status message indicating the connection + // just closed. + b1.clear(); + in.dequeue(msg); + bs = msg.get<bridge_status>(); + cout << "bridge 1 status: is_connected: " << bs.is_connected << endl; + cout << "bridge 1 status: foreign_ip: " << bs.foreign_ip << endl; + cout << "bridge 1 status: foreign_port: " << bs.foreign_port << endl; +} + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + diff --git a/ml/dlib/examples/bsp_ex.cpp b/ml/dlib/examples/bsp_ex.cpp new file mode 100644 index 00000000..7dffa68d --- /dev/null +++ b/ml/dlib/examples/bsp_ex.cpp @@ -0,0 +1,282 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the Bulk Synchronous Parallel (BSP) + processing tools from the dlib C++ Library. These tools allow you to easily setup a + number of processes running on different computers which cooperate to compute some + result. + + In this example, we will use the BSP tools to find the minimizer of a simple function. + In particular, we will setup a nested grid search where different parts of the grid are + searched in parallel by different processes. + + + To run this program you should do the following (supposing you want to use three BSP + nodes to do the grid search and, to make things easy, you will run them all on your + current computer): + + 1. Open three command windows and navigate each to the folder containing the + compiled bsp_ex.cpp program. Let's call these window 1, window 2, and window 3. + + 2. In window 1 execute this command: + ./bsp_ex -l12345 + This will start a listening BSP node that listens on port 12345. The BSP node + won't do anything until we tell all the nodes to start running in step 4 below. + + 3. In window 2 execute this command: + ./bsp_ex -l12346 + This starts another listening BSP node. Note that since we are running this + example all on one computer you need to use different listening port numbers + for each listening node. + + 4. In window 3 execute this command: + ./bsp_ex localhost:12345 localhost:12346 + This will start a BSP node that connects to the others and gets them all running. + Additionally, as you will see when we go over the code below, it will also print + the final output of the BSP process, which is the minimizer of our test function. + Once it terminates, all the other BSP nodes will also automatically terminate. +*/ + + + + + +#include <dlib/cmd_line_parser.h> +#include <dlib/bsp.h> +#include <dlib/matrix.h> + +#include <iostream> + +using namespace std; +using namespace dlib; + +// ---------------------------------------------------------------------------------------- + +// These are the functions executed by the BSP nodes. They are defined below. +void bsp_job_node_0 (bsp_context& bsp, double& min_value, double& optimal_x); +void bsp_job_other_nodes (bsp_context& bsp, long grid_resolution); + +// ---------------------------------------------------------------------------------------- + +int main(int argc, char** argv) +{ + try + { + // Use the dlib command_line_parser to parse the command line. See the + // compress_stream_ex.cpp example program for an introduction to the command line + // parser. + command_line_parser parser; + parser.add_option("h","Display this help message."); + parser.add_option("l","Run as a listening BSP node.",1); + parser.parse(argc, argv); + parser.check_option_arg_range("l", 1, 65535); + + + // Print a help message if the user gives -h on the command line. + if (parser.option("h")) + { + // display all the command line options + cout << "Usage: bsp_ex (-l port | <list of hosts>)\n"; + parser.print_options(); + return 0; + } + + + // If the command line contained -l + if (parser.option("l")) + { + // Get the argument to -l + const unsigned short listening_port = get_option(parser, "l", 0); + cout << "Listening on port " << listening_port << endl; + + const long grid_resolution = 100; + + // bsp_listen() starts a listening BSP job. This means that it will wait until + // someone calls bsp_connect() and connects to it before it starts running. + // However, once it starts it will call bsp_job_other_nodes() which will then + // do all the real work. + // + // The first argument is the port to listen on. The second argument is the + // function which it should run to do all the work. The other arguments are + // optional and allow you to pass values into the bsp_job_other_nodes() + // routine. In this case, we are passing the grid_resolution to + // bsp_job_other_nodes(). + bsp_listen(listening_port, bsp_job_other_nodes, grid_resolution); + } + else + { + if (parser.number_of_arguments() == 0) + { + cout << "You must give some listening BSP nodes as arguments to this program!" << endl; + return 0; + } + + // Take the hostname:port strings from the command line and put them into the + // vector of hosts. + std::vector<network_address> hosts; + for (unsigned long i = 0; i < parser.number_of_arguments(); ++i) + hosts.push_back(parser[i]); + + double min_value, optimal_x; + + // Calling bsp_connect() does two things. First, it tells all the BSP jobs + // listed in the hosts vector to start running. Second, it starts a locally + // running BSP job that executes bsp_job_node_0() and passes it any arguments + // listed after bsp_job_node_0. So in this case it passes it the 3rd and 4th + // arguments. + // + // Note also that we use dlib::ref() which causes these arguments to be passed + // by reference. This means that bsp_job_node_0() will be able to modify them + // and we will see the results here in main() after bsp_connect() terminates. + bsp_connect(hosts, bsp_job_node_0, dlib::ref(min_value), dlib::ref(optimal_x)); + + // bsp_connect() and bsp_listen() block until all the BSP nodes have terminated. + // Therefore, we won't get to this part of the code until the BSP processing + // has finished. But once we do we can print the results like so: + cout << "optimal_x: "<< optimal_x << endl; + cout << "min_value: "<< min_value << endl; + } + + } + catch (std::exception& e) + { + cout << "error in main(): " << e.what() << endl; + } +} + +// ---------------------------------------------------------------------------------------- + +/* + We are going to use the BSP tools to find the minimum of f(x). Note that + it's minimizer is at x == 2.0. +*/ +double f (double x) +{ + return std::pow(x-2.0, 2.0); +} + +// ---------------------------------------------------------------------------------------- + +void bsp_job_node_0 (bsp_context& bsp, double& min_value, double& optimal_x) +{ + // This function is called by bsp_connect(). In general, any BSP node can do anything + // you want. However, in this example we use this node as a kind of controller for the + // other nodes. In particular, since we are doing a nested grid search, this node's + // job will be to collect results from other nodes and then decide which part of the + // number line subsequent iterations should focus on. + // + // Also, each BSP node has a node ID number. You can determine it by calling + // bsp.node_id(). However, the node spawned by a call to bsp_connect() always has a + // node ID of 0 (hence the name of this function). Additionally, all functions + // executing a BSP task always take a bsp_context as their first argument. This object + // is the interface that allows BSP jobs to communicate with each other. + + + // Now let's get down to work. Recall that we are trying to find the x value that + // minimizes the f(x) defined above. The grid search will start out by considering the + // range [-1e100, 1e100] on the number line. It will progressively narrow this window + // until it has located the minimizer of f(x) to within 1e-15 of its true value. + double left = -1e100; + double right = 1e100; + + min_value = std::numeric_limits<double>::infinity(); + double interval_width = std::abs(right-left); + + // keep going until the window is smaller than 1e-15. + while (right-left > 1e-15) + { + // At the start of each loop, we broadcast the current window to all the other BSP + // nodes. They will each search a separate part of the window and then report back + // the smallest values they found in their respective sub-windows. + // + // Also, you can send/broadcast/receive anything that has global serialize() and + // deserialize() routines defined for it. Dlib comes with serialization functions + // for a lot of types by default, so we don't have to define anything for this + // example program. However, if you want to send an object you defined then you + // will need to write your own serialization functions. See the documentation for + // dlib's serialize() routine or the bridge_ex.cpp example program for an example. + bsp.broadcast(left); + bsp.broadcast(right); + + // Receive the smallest values found from the other BSP nodes. + for (unsigned int k = 1; k < bsp.number_of_nodes(); ++k) + { + // The other nodes will send std::pairs of x/f(x) values. So that is what we + // receive. + std::pair<double,double> val; + bsp.receive(val); + // save the smallest result. + if (val.second < min_value) + { + min_value = val.second; + optimal_x = val.first; + } + } + + // Now narrow the search window by half. + interval_width *= 0.5; + left = optimal_x - interval_width/2; + right = optimal_x + interval_width/2; + } +} + +// ---------------------------------------------------------------------------------------- + +void bsp_job_other_nodes (bsp_context& bsp, long grid_resolution) +{ + // This is the BSP job called by bsp_listen(). In these jobs we will receive window + // ranges from the controller node, search our sub-window, and then report back the + // location of the best x value we found. + + double left, right; + + // The try_receive() function will either return true with the next message or return + // false if there aren't any more messages in flight between nodes and all other BSP + // nodes are blocked on calls to receive or have terminated. That is, try_receive() + // only returns false if waiting for a message would result in all the BSP nodes + // waiting forever. + // + // Therefore, try_receive() serves both as a message receiving tool as well as an + // implicit form of barrier synchronization. In this case, we use it to know when to + // terminate. That is, we know it is time to terminate if all the messages between + // nodes have been received and all nodes are inactive due to either termination or + // being blocked on a receive call. This will happen once the controller node above + // terminates since it will result in all the other nodes inevitably becoming blocked + // on this try_receive() line with no messages to process. + while (bsp.try_receive(left)) + { + bsp.receive(right); + + // Compute a sub-window range for us to search. We use our node's ID value and the + // total number of nodes to select a subset of the [left, right] window. We will + // store the grid points from our sub-window in values_to_check. + const double l = (bsp.node_id()-1)/(bsp.number_of_nodes()-1.0); + const double r = bsp.node_id() /(bsp.number_of_nodes()-1.0); + const double width = right-left; + // Select grid_resolution number of points which are linearly spaced throughout our + // sub-window. + const matrix<double> values_to_check = linspace(left+l*width, left+r*width, grid_resolution); + + // Search all the points in values_to_check and figure out which one gives the + // minimum value of f(). + double best_x = 0; + double best_val = std::numeric_limits<double>::infinity(); + for (long j = 0; j < values_to_check.size(); ++j) + { + double temp = f(values_to_check(j)); + if (temp < best_val) + { + best_val = temp; + best_x = values_to_check(j); + } + } + + // Report back the identity of the best point we found in our sub-window. Note + // that the second argument to send(), the 0, is the node ID to send to. In this + // case we send our results back to the controller node. + bsp.send(make_pair(best_x, best_val), 0); + } +} + +// ---------------------------------------------------------------------------------------- + diff --git a/ml/dlib/examples/compress_stream_ex.cpp b/ml/dlib/examples/compress_stream_ex.cpp new file mode 100644 index 00000000..502400e5 --- /dev/null +++ b/ml/dlib/examples/compress_stream_ex.cpp @@ -0,0 +1,245 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the compress_stream and + cmd_line_parser components from the dlib C++ Library. + + This example implements a simple command line compression utility. + + + The output from the program when the -h option is given is: + + Usage: compress_stream_ex (-c|-d|-l) --in input_file --out output_file + Options: + -c Indicates that we want to compress a file. + -d Indicates that we want to decompress a file. + --in <arg> This option takes one argument which specifies the name of the + file we want to compress/decompress. + --out <arg> This option takes one argument which specifies the name of the + output file. + + Miscellaneous Options: + -h Display this help message. + -l <arg> Set the compression level [1-3], 3 is max compression, default + is 2. + +*/ + + + + +#include <dlib/compress_stream.h> +#include <dlib/cmd_line_parser.h> +#include <iostream> +#include <fstream> +#include <string> + +// I am making a typedefs for the versions of compress_stream I want to use. +typedef dlib::compress_stream::kernel_1da cs1; +typedef dlib::compress_stream::kernel_1ea cs2; +typedef dlib::compress_stream::kernel_1ec cs3; + + +using namespace std; +using namespace dlib; + + +int main(int argc, char** argv) +{ + try + { + command_line_parser parser; + + // first I will define the command line options I want. + // Add a -c option and tell the parser what the option is for. + parser.add_option("c","Indicates that we want to compress a file."); + parser.add_option("d","Indicates that we want to decompress a file."); + // add a --in option that takes 1 argument + parser.add_option("in","This option takes one argument which specifies the name of the file we want to compress/decompress.",1); + // add a --out option that takes 1 argument + parser.add_option("out","This option takes one argument which specifies the name of the output file.",1); + // In the code below, we use the parser.print_options() method to print all our + // options to the screen. We can tell it that we would like some options to be + // grouped together by calling set_group_name() before adding those options. In + // general, you can make as many groups as you like by calling set_group_name(). + // However, here we make only one named group. + parser.set_group_name("Miscellaneous Options"); + parser.add_option("h","Display this help message."); + parser.add_option("l","Set the compression level [1-3], 3 is max compression, default is 2.",1); + + + // now I will parse the command line + parser.parse(argc,argv); + + + // Now I will use the parser to validate some things about the command line. + // If any of the following checks fail then an exception will be thrown and it will + // contain a message that tells the user what the problem was. + + // First I want to check that none of the options were given on the command line + // more than once. To do this I define an array that contains the options + // that shouldn't appear more than once and then I just call check_one_time_options() + const char* one_time_opts[] = {"c", "d", "in", "out", "h", "l"}; + parser.check_one_time_options(one_time_opts); + // Here I'm checking that the user didn't pick both the c and d options at the + // same time. + parser.check_incompatible_options("c", "d"); + + // Here I'm checking that the argument to the l option is an integer in the range 1 to 3. + // That is, it should be convertible to an int by dlib::string_assign and be either + // 1, 2, or 3. Note that if you wanted to allow floating point values in the range 1 to + // 3 then you could give a range 1.0 to 3.0 or explicitly supply a type of float or double + // to the template argument of the check_option_arg_range() function. + parser.check_option_arg_range("l", 1, 3); + + // The 'l' option is a sub-option of the 'c' option. That is, you can only select the + // compression level when compressing. This command below checks that the listed + // sub options are always given in the presence of their parent options. + const char* c_sub_opts[] = {"l"}; + parser.check_sub_options("c", c_sub_opts); + + // check if the -h option was given on the command line + if (parser.option("h")) + { + // display all the command line options + cout << "Usage: compress_stream_ex (-c|-d|-l) --in input_file --out output_file\n"; + // This function prints out a nicely formatted list of + // all the options the parser has + parser.print_options(); + return 0; + } + + // Figure out what the compression level should be. If the user didn't supply + // this command line option then a value of 2 will be used. + int compression_level = get_option(parser,"l",2); + + + // make sure one of the c or d options was given + if (!parser.option("c") && !parser.option("d")) + { + cout << "Error in command line:\n You must specify either the c option or the d option.\n"; + cout << "\nTry the -h option for more information." << endl; + return 0; + } + + + string in_file; + string out_file; + + // check if the user told us the input file and if they did then + // get the file name + if (parser.option("in")) + { + in_file = parser.option("in").argument(); + } + else + { + cout << "Error in command line:\n You must specify an input file.\n"; + cout << "\nTry the -h option for more information." << endl; + return 0; + } + + + // check if the user told us the output file and if they did then + // get the file name + if (parser.option("out")) + { + out_file = parser.option("out").argument(); + } + else + { + cout << "Error in command line:\n You must specify an output file.\n"; + cout << "\nTry the -h option for more information." << endl; + return 0; + } + + + // open the files we will be reading from and writing to + ifstream fin(in_file.c_str(),ios::binary); + ofstream fout(out_file.c_str(),ios::binary); + + // make sure the files opened correctly + if (!fin) + { + cout << "Error opening file " << in_file << ".\n"; + return 0; + } + + if (!fout) + { + cout << "Error creating file " << out_file << ".\n"; + return 0; + } + + + + // now perform the actual compression or decompression. + if (parser.option("c")) + { + // save the compression level to the output file + serialize(compression_level, fout); + + switch (compression_level) + { + case 1: + { + cs1 compressor; + compressor.compress(fin,fout); + }break; + case 2: + { + cs2 compressor; + compressor.compress(fin,fout); + }break; + case 3: + { + cs3 compressor; + compressor.compress(fin,fout); + }break; + } + } + else + { + // obtain the compression level from the input file + deserialize(compression_level, fin); + + switch (compression_level) + { + case 1: + { + cs1 compressor; + compressor.decompress(fin,fout); + }break; + case 2: + { + cs2 compressor; + compressor.decompress(fin,fout); + }break; + case 3: + { + cs3 compressor; + compressor.decompress(fin,fout); + }break; + default: + { + cout << "Error in compressed file, invalid compression level" << endl; + }break; + } + } + + + + + } + catch (exception& e) + { + // Note that this will catch any cmd_line_parse_error exceptions and print + // the default message. + cout << e.what() << endl; + } +} + + + + + diff --git a/ml/dlib/examples/config.txt b/ml/dlib/examples/config.txt new file mode 100644 index 00000000..da21d170 --- /dev/null +++ b/ml/dlib/examples/config.txt @@ -0,0 +1,30 @@ +# This is an example config file. Note that # is used to create a comment. + +# At its most basic level a config file is just a bunch of key/value pairs. +# So for example: +key1 = value2 +dlib = a C++ library + +# You can also define "sub blocks" in your config files like so +user1 +{ + # Inside a sub block you can list more key/value pairs. + id = 42 + name = davis + + # you can also nest sub-blocks as deep as you want + details + { + editor = vim + home_dir = /home/davis + } +} +user2 { + id = 1234 + name = joe + details { + editor = emacs + home_dir = /home/joe + } +} + diff --git a/ml/dlib/examples/config_reader_ex.cpp b/ml/dlib/examples/config_reader_ex.cpp new file mode 100644 index 00000000..02ad1cc6 --- /dev/null +++ b/ml/dlib/examples/config_reader_ex.cpp @@ -0,0 +1,146 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the config_reader component + from the dlib C++ Library. + + This example uses the config_reader to load a config file and then + prints out the values of various fields in the file. +*/ + + +#include <dlib/config_reader.h> +#include <iostream> +#include <fstream> +#include <vector> + + +using namespace std; +using namespace dlib; + +// ---------------------------------------------------------------------------------------- +// For reference, the contents of the config file used in this example is reproduced below: +/* + +# This is an example config file. Note that # is used to create a comment. + +# At its most basic level a config file is just a bunch of key/value pairs. +# So for example: +key1 = value2 +dlib = a C++ library + +# You can also define "sub blocks" in your config files like so +user1 +{ + # Inside a sub block you can list more key/value pairs. + id = 42 + name = davis + + # you can also nest sub-blocks as deep as you want + details + { + editor = vim + home_dir = /home/davis + } +} +user2 { + id = 1234 + name = joe + details { + editor = emacs + home_dir = /home/joe + } +} + +*/ +// ---------------------------------------------------------------------------------------- + +void print_config_reader_contents ( + const config_reader& cr, + int depth = 0 +); +/* + This is a simple function that recursively walks through everything in + a config reader and prints it to the screen. +*/ + +// ---------------------------------------------------------------------------------------- + +int main() +{ + try + { + config_reader cr("config.txt"); + + // Use our recursive function to print everything in the config file. + print_config_reader_contents(cr); + + // Now let's access some of the fields of the config file directly. You + // use [] for accessing key values and .block() for accessing sub-blocks. + + // Print out the string value assigned to key1 in the config file + cout << cr["key1"] << endl; + + // Print out the name field inside the user1 sub-block + cout << cr.block("user1")["name"] << endl; + // Now print out the editor field in the details block + cout << cr.block("user1").block("details")["editor"] << endl; + + + // Note that you can use get_option() to easily convert fields into + // non-string types. For example, the config file has an integer id + // field that can be converted into an int like so: + int id1 = get_option(cr,"user1.id",0); + int id2 = get_option(cr,"user2.id",0); + cout << "user1's id is " << id1 << endl; + cout << "user2's id is " << id2 << endl; + // The third argument to get_option() is the default value returned if + // the config reader doesn't contain a corresponding entry. So for + // example, the following prints 321 since there is no user3. + int id3 = get_option(cr,"user3.id",321); + cout << "user3's id is " << id3 << endl; + + } + catch (exception& e) + { + // Finally, note that the config_reader throws exceptions if the config + // file is corrupted or if you ask it for a key or block that doesn't exist. + // Here we print out any such error messages. + cout << e.what() << endl; + } +} + +// ---------------------------------------------------------------------------------------- + +void print_config_reader_contents ( + const config_reader& cr, + int depth +) +{ + // Make a string with depth*4 spaces in it. + const string padding(depth*4, ' '); + + // We can obtain a list of all the keys and sub-blocks defined + // at the current level in the config reader like so: + vector<string> keys, blocks; + cr.get_keys(keys); + cr.get_blocks(blocks); + + // Now print all the key/value pairs + for (unsigned long i = 0; i < keys.size(); ++i) + cout << padding << keys[i] << " = " << cr[keys[i]] << endl; + + // Now print all the sub-blocks. + for (unsigned long i = 0; i < blocks.size(); ++i) + { + // First print the block name + cout << padding << blocks[i] << " { " << endl; + // Now recursively print the contents of the sub block. Note that the cr.block() + // function returns another config_reader that represents the sub-block. + print_config_reader_contents(cr.block(blocks[i]), depth+1); + cout << padding << "}" << endl; + } +} + +// ---------------------------------------------------------------------------------------- + diff --git a/ml/dlib/examples/custom_trainer_ex.cpp b/ml/dlib/examples/custom_trainer_ex.cpp new file mode 100644 index 00000000..39af53f3 --- /dev/null +++ b/ml/dlib/examples/custom_trainer_ex.cpp @@ -0,0 +1,277 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This example program shows you how to create your own custom binary classification + trainer object and use it with the multiclass classification tools in the dlib C++ + library. This example assumes you have already become familiar with the concepts + introduced in the multiclass_classification_ex.cpp example program. + + + In this example we will create a very simple trainer object that takes a binary + classification problem and produces a decision rule which says a test point has the + same class as whichever centroid it is closest to. + + The multiclass training dataset will consist of four classes. Each class will be a blob + of points in one of the quadrants of the cartesian plane. For fun, we will use + std::string labels and therefore the labels of these classes will be the following: + "upper_left", + "upper_right", + "lower_left", + "lower_right" +*/ + +#include <dlib/svm_threaded.h> + +#include <iostream> +#include <vector> + +#include <dlib/rand.h> + +using namespace std; +using namespace dlib; + +// Our data will be 2-dimensional data. So declare an appropriate type to contain these points. +typedef matrix<double,2,1> sample_type; + +// ---------------------------------------------------------------------------------------- + +struct custom_decision_function +{ + /*! + WHAT THIS OBJECT REPRESENTS + This object is the representation of our binary decision rule. + !*/ + + // centers of the two classes + sample_type positive_center, negative_center; + + double operator() ( + const sample_type& x + ) const + { + // if x is closer to the positive class then return +1 + if (length(positive_center - x) < length(negative_center - x)) + return +1; + else + return -1; + } +}; + +// Later on in this example we will save our decision functions to disk. This +// pair of routines is needed for this functionality. +void serialize (const custom_decision_function& item, std::ostream& out) +{ + // write the state of item to the output stream + serialize(item.positive_center, out); + serialize(item.negative_center, out); +} + +void deserialize (custom_decision_function& item, std::istream& in) +{ + // read the data from the input stream and store it in item + deserialize(item.positive_center, in); + deserialize(item.negative_center, in); +} + +// ---------------------------------------------------------------------------------------- + +class simple_custom_trainer +{ + /*! + WHAT THIS OBJECT REPRESENTS + This is our example custom binary classifier trainer object. It simply + computes the means of the +1 and -1 classes, puts them into our + custom_decision_function, and returns the results. + + Below we define the train() function. I have also included the + requires/ensures definition for a generic binary classifier's train() + !*/ +public: + + + custom_decision_function train ( + const std::vector<sample_type>& samples, + const std::vector<double>& labels + ) const + /*! + requires + - is_binary_classification_problem(samples, labels) == true + (e.g. labels consists of only +1 and -1 values, samples.size() == labels.size()) + ensures + - returns a decision function F with the following properties: + - if (new_x is a sample predicted have +1 label) then + - F(new_x) >= 0 + - else + - F(new_x) < 0 + !*/ + { + sample_type positive_center, negative_center; + + // compute sums of each class + positive_center = 0; + negative_center = 0; + for (unsigned long i = 0; i < samples.size(); ++i) + { + if (labels[i] == +1) + positive_center += samples[i]; + else // this is a -1 sample + negative_center += samples[i]; + } + + // divide by number of +1 samples + positive_center /= sum(mat(labels) == +1); + // divide by number of -1 samples + negative_center /= sum(mat(labels) == -1); + + custom_decision_function df; + df.positive_center = positive_center; + df.negative_center = negative_center; + + return df; + } +}; + +// ---------------------------------------------------------------------------------------- + +void generate_data ( + std::vector<sample_type>& samples, + std::vector<string>& labels +); +/*! + ensures + - make some four class data as described above. + - each class will have 50 samples in it +!*/ + +// ---------------------------------------------------------------------------------------- + +int main() +{ + std::vector<sample_type> samples; + std::vector<string> labels; + + // First, get our labeled set of training data + generate_data(samples, labels); + + cout << "samples.size(): "<< samples.size() << endl; + + // Define the trainer we will use. The second template argument specifies the type + // of label used, which is string in this case. + typedef one_vs_one_trainer<any_trainer<sample_type>, string> ovo_trainer; + + + ovo_trainer trainer; + + // Now tell the one_vs_one_trainer that, by default, it should use the simple_custom_trainer + // to solve the individual binary classification subproblems. + trainer.set_trainer(simple_custom_trainer()); + + // Next, to make things a little more interesting, we will setup the one_vs_one_trainer + // to use kernel ridge regression to solve the upper_left vs lower_right binary classification + // subproblem. + typedef radial_basis_kernel<sample_type> rbf_kernel; + krr_trainer<rbf_kernel> rbf_trainer; + rbf_trainer.set_kernel(rbf_kernel(0.1)); + trainer.set_trainer(rbf_trainer, "upper_left", "lower_right"); + + + // Now let's do 5-fold cross-validation using the one_vs_one_trainer we just setup. + // As an aside, always shuffle the order of the samples before doing cross validation. + // For a discussion of why this is a good idea see the svm_ex.cpp example. + randomize_samples(samples, labels); + cout << "cross validation: \n" << cross_validate_multiclass_trainer(trainer, samples, labels, 5) << endl; + // This dataset is very easy and everything is correctly classified. Therefore, the output of + // cross validation is the following confusion matrix. + /* + 50 0 0 0 + 0 50 0 0 + 0 0 50 0 + 0 0 0 50 + */ + + + // We can also obtain the decision rule as always. + one_vs_one_decision_function<ovo_trainer> df = trainer.train(samples, labels); + + cout << "predicted label: "<< df(samples[0]) << ", true label: "<< labels[0] << endl; + cout << "predicted label: "<< df(samples[90]) << ", true label: "<< labels[90] << endl; + // The output is: + /* + predicted label: upper_right, true label: upper_right + predicted label: lower_left, true label: lower_left + */ + + + // Finally, let's save our multiclass decision rule to disk. Remember that we have + // to specify the types of binary decision function used inside the one_vs_one_decision_function. + one_vs_one_decision_function<ovo_trainer, + custom_decision_function, // This is the output of the simple_custom_trainer + decision_function<radial_basis_kernel<sample_type> > // This is the output of the rbf_trainer + > df2, df3; + + df2 = df; + // save to a file called df.dat + serialize("df.dat") << df2; + + // load the function back in from disk and store it in df3. + deserialize("df.dat") >> df3; + + + // Test df3 to see that this worked. + cout << endl; + cout << "predicted label: "<< df3(samples[0]) << ", true label: "<< labels[0] << endl; + cout << "predicted label: "<< df3(samples[90]) << ", true label: "<< labels[90] << endl; + // Test df3 on the samples and labels and print the confusion matrix. + cout << "test deserialized function: \n" << test_multiclass_decision_function(df3, samples, labels) << endl; + +} + +// ---------------------------------------------------------------------------------------- + +void generate_data ( + std::vector<sample_type>& samples, + std::vector<string>& labels +) +{ + const long num = 50; + + sample_type m; + + dlib::rand rnd; + + + // add some points in the upper right quadrant + m = 10, 10; + for (long i = 0; i < num; ++i) + { + samples.push_back(m + randm(2,1,rnd)); + labels.push_back("upper_right"); + } + + // add some points in the upper left quadrant + m = -10, 10; + for (long i = 0; i < num; ++i) + { + samples.push_back(m + randm(2,1,rnd)); + labels.push_back("upper_left"); + } + + // add some points in the lower right quadrant + m = 10, -10; + for (long i = 0; i < num; ++i) + { + samples.push_back(m + randm(2,1,rnd)); + labels.push_back("lower_right"); + } + + // add some points in the lower left quadrant + m = -10, -10; + for (long i = 0; i < num; ++i) + { + samples.push_back(m + randm(2,1,rnd)); + labels.push_back("lower_left"); + } + +} + +// ---------------------------------------------------------------------------------------- + diff --git a/ml/dlib/examples/dir_nav_ex.cpp b/ml/dlib/examples/dir_nav_ex.cpp new file mode 100644 index 00000000..2f51f2d1 --- /dev/null +++ b/ml/dlib/examples/dir_nav_ex.cpp @@ -0,0 +1,75 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the dir_nav component from the dlib C++ Library. + It prints a listing of all directories and files in the users + current working directory or the directory specified on the command line. + +*/ + + +#include <iostream> +#include <iomanip> +#include <dlib/dir_nav.h> +#include <vector> +#include <algorithm> + +using namespace std; +using namespace dlib; + + +int main(int argc, char** argv) +{ + try + { + string loc; + if (argc == 2) + loc = argv[1]; + else + loc = "."; // if no argument is given then use the current working dir. + + directory test(loc); + + + cout << "directory: " << test.name() << endl; + cout << "full path: " << test.full_name() << endl; + cout << "is root: " << ((test.is_root())?"yes":"no") << endl; + + // get all directories and files in test + std::vector<directory> dirs = test.get_dirs(); + std::vector<file> files = test.get_files(); + + // sort the files and directories + sort(files.begin(), files.end()); + sort(dirs.begin(), dirs.end()); + + cout << "\n\n\n"; + + // print all the subdirectories + for (unsigned long i = 0; i < dirs.size(); ++i) + cout << " <DIR> " << dirs[i].name() << "\n"; + + // print all the subfiles + for (unsigned long i = 0; i < files.size(); ++i) + cout << setw(13) << files[i].size() << " " << files[i].name() << "\n"; + + + cout << "\n\nnumber of dirs: " << dirs.size() << endl; + cout << "number of files: " << files.size() << endl; + + } + catch (file::file_not_found& e) + { + cout << "file not found or accessible: " << e.info << endl; + } + catch (directory::dir_not_found& e) + { + cout << "dir not found or accessible: " << e.info << endl; + } + catch (directory::listing_error& e) + { + cout << "listing error: " << e.info << endl; + } +} + + diff --git a/ml/dlib/examples/dnn_face_recognition_ex.cpp b/ml/dlib/examples/dnn_face_recognition_ex.cpp new file mode 100644 index 00000000..4c0a2a02 --- /dev/null +++ b/ml/dlib/examples/dnn_face_recognition_ex.cpp @@ -0,0 +1,220 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This is an example illustrating the use of the deep learning tools from the dlib C++ + Library. In it, we will show how to do face recognition. This example uses the + pretrained dlib_face_recognition_resnet_model_v1 model which is freely available from + the dlib web site. This model has a 99.38% accuracy on the standard LFW face + recognition benchmark, which is comparable to other state-of-the-art methods for face + recognition as of February 2017. + + In this example, we will use dlib to do face clustering. Included in the examples + folder is an image, bald_guys.jpg, which contains a bunch of photos of action movie + stars Vin Diesel, The Rock, Jason Statham, and Bruce Willis. We will use dlib to + automatically find their faces in the image and then to automatically determine how + many people there are (4 in this case) as well as which faces belong to each person. + + Finally, this example uses a network with the loss_metric loss. Therefore, if you want + to learn how to train your own models, or to get a general introduction to this loss + layer, you should read the dnn_metric_learning_ex.cpp and + dnn_metric_learning_on_images_ex.cpp examples. +*/ + +#include <dlib/dnn.h> +#include <dlib/gui_widgets.h> +#include <dlib/clustering.h> +#include <dlib/string.h> +#include <dlib/image_io.h> +#include <dlib/image_processing/frontal_face_detector.h> + +using namespace dlib; +using namespace std; + +// ---------------------------------------------------------------------------------------- + +// The next bit of code defines a ResNet network. It's basically copied +// and pasted from the dnn_imagenet_ex.cpp example, except we replaced the loss +// layer with loss_metric and made the network somewhat smaller. Go read the introductory +// dlib DNN examples to learn what all this stuff means. +// +// Also, the dnn_metric_learning_on_images_ex.cpp example shows how to train this network. +// The dlib_face_recognition_resnet_model_v1 model used by this example was trained using +// essentially the code shown in dnn_metric_learning_on_images_ex.cpp except the +// mini-batches were made larger (35x15 instead of 5x5), the iterations without progress +// was set to 10000, and the training dataset consisted of about 3 million images instead of +// 55. Also, the input layer was locked to images of size 150. +template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET> +using residual = add_prev1<block<N,BN,1,tag1<SUBNET>>>; + +template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET> +using residual_down = add_prev2<avg_pool<2,2,2,2,skip1<tag2<block<N,BN,2,tag1<SUBNET>>>>>>; + +template <int N, template <typename> class BN, int stride, typename SUBNET> +using block = BN<con<N,3,3,1,1,relu<BN<con<N,3,3,stride,stride,SUBNET>>>>>; + +template <int N, typename SUBNET> using ares = relu<residual<block,N,affine,SUBNET>>; +template <int N, typename SUBNET> using ares_down = relu<residual_down<block,N,affine,SUBNET>>; + +template <typename SUBNET> using alevel0 = ares_down<256,SUBNET>; +template <typename SUBNET> using alevel1 = ares<256,ares<256,ares_down<256,SUBNET>>>; +template <typename SUBNET> using alevel2 = ares<128,ares<128,ares_down<128,SUBNET>>>; +template <typename SUBNET> using alevel3 = ares<64,ares<64,ares<64,ares_down<64,SUBNET>>>>; +template <typename SUBNET> using alevel4 = ares<32,ares<32,ares<32,SUBNET>>>; + +using anet_type = loss_metric<fc_no_bias<128,avg_pool_everything< + alevel0< + alevel1< + alevel2< + alevel3< + alevel4< + max_pool<3,3,2,2,relu<affine<con<32,7,7,2,2, + input_rgb_image_sized<150> + >>>>>>>>>>>>; + +// ---------------------------------------------------------------------------------------- + +std::vector<matrix<rgb_pixel>> jitter_image( + const matrix<rgb_pixel>& img +); + +// ---------------------------------------------------------------------------------------- + +int main(int argc, char** argv) try +{ + if (argc != 2) + { + cout << "Run this example by invoking it like this: " << endl; + cout << " ./dnn_face_recognition_ex faces/bald_guys.jpg" << endl; + cout << endl; + cout << "You will also need to get the face landmarking model file as well as " << endl; + cout << "the face recognition model file. Download and then decompress these files from: " << endl; + cout << "http://dlib.net/files/shape_predictor_5_face_landmarks.dat.bz2" << endl; + cout << "http://dlib.net/files/dlib_face_recognition_resnet_model_v1.dat.bz2" << endl; + cout << endl; + return 1; + } + + // The first thing we are going to do is load all our models. First, since we need to + // find faces in the image we will need a face detector: + frontal_face_detector detector = get_frontal_face_detector(); + // We will also use a face landmarking model to align faces to a standard pose: (see face_landmark_detection_ex.cpp for an introduction) + shape_predictor sp; + deserialize("shape_predictor_5_face_landmarks.dat") >> sp; + // And finally we load the DNN responsible for face recognition. + anet_type net; + deserialize("dlib_face_recognition_resnet_model_v1.dat") >> net; + + matrix<rgb_pixel> img; + load_image(img, argv[1]); + // Display the raw image on the screen + image_window win(img); + + // Run the face detector on the image of our action heroes, and for each face extract a + // copy that has been normalized to 150x150 pixels in size and appropriately rotated + // and centered. + std::vector<matrix<rgb_pixel>> faces; + for (auto face : detector(img)) + { + auto shape = sp(img, face); + matrix<rgb_pixel> face_chip; + extract_image_chip(img, get_face_chip_details(shape,150,0.25), face_chip); + faces.push_back(move(face_chip)); + // Also put some boxes on the faces so we can see that the detector is finding + // them. + win.add_overlay(face); + } + + if (faces.size() == 0) + { + cout << "No faces found in image!" << endl; + return 1; + } + + // This call asks the DNN to convert each face image in faces into a 128D vector. + // In this 128D vector space, images from the same person will be close to each other + // but vectors from different people will be far apart. So we can use these vectors to + // identify if a pair of images are from the same person or from different people. + std::vector<matrix<float,0,1>> face_descriptors = net(faces); + + + // In particular, one simple thing we can do is face clustering. This next bit of code + // creates a graph of connected faces and then uses the Chinese whispers graph clustering + // algorithm to identify how many people there are and which faces belong to whom. + std::vector<sample_pair> edges; + for (size_t i = 0; i < face_descriptors.size(); ++i) + { + for (size_t j = i; j < face_descriptors.size(); ++j) + { + // Faces are connected in the graph if they are close enough. Here we check if + // the distance between two face descriptors is less than 0.6, which is the + // decision threshold the network was trained to use. Although you can + // certainly use any other threshold you find useful. + if (length(face_descriptors[i]-face_descriptors[j]) < 0.6) + edges.push_back(sample_pair(i,j)); + } + } + std::vector<unsigned long> labels; + const auto num_clusters = chinese_whispers(edges, labels); + // This will correctly indicate that there are 4 people in the image. + cout << "number of people found in the image: "<< num_clusters << endl; + + + // Now let's display the face clustering results on the screen. You will see that it + // correctly grouped all the faces. + std::vector<image_window> win_clusters(num_clusters); + for (size_t cluster_id = 0; cluster_id < num_clusters; ++cluster_id) + { + std::vector<matrix<rgb_pixel>> temp; + for (size_t j = 0; j < labels.size(); ++j) + { + if (cluster_id == labels[j]) + temp.push_back(faces[j]); + } + win_clusters[cluster_id].set_title("face cluster " + cast_to_string(cluster_id)); + win_clusters[cluster_id].set_image(tile_images(temp)); + } + + + + + // Finally, let's print one of the face descriptors to the screen. + cout << "face descriptor for one face: " << trans(face_descriptors[0]) << endl; + + // It should also be noted that face recognition accuracy can be improved if jittering + // is used when creating face descriptors. In particular, to get 99.38% on the LFW + // benchmark you need to use the jitter_image() routine to compute the descriptors, + // like so: + matrix<float,0,1> face_descriptor = mean(mat(net(jitter_image(faces[0])))); + cout << "jittered face descriptor for one face: " << trans(face_descriptor) << endl; + // If you use the model without jittering, as we did when clustering the bald guys, it + // gets an accuracy of 99.13% on the LFW benchmark. So jittering makes the whole + // procedure a little more accurate but makes face descriptor calculation slower. + + + cout << "hit enter to terminate" << endl; + cin.get(); +} +catch (std::exception& e) +{ + cout << e.what() << endl; +} + +// ---------------------------------------------------------------------------------------- + +std::vector<matrix<rgb_pixel>> jitter_image( + const matrix<rgb_pixel>& img +) +{ + // All this function does is make 100 copies of img, all slightly jittered by being + // zoomed, rotated, and translated a little bit differently. They are also randomly + // mirrored left to right. + thread_local dlib::rand rnd; + + std::vector<matrix<rgb_pixel>> crops; + for (int i = 0; i < 100; ++i) + crops.push_back(jitter_image(img,rnd)); + + return crops; +} + +// ---------------------------------------------------------------------------------------- + diff --git a/ml/dlib/examples/dnn_imagenet_ex.cpp b/ml/dlib/examples/dnn_imagenet_ex.cpp new file mode 100644 index 00000000..d1fa8282 --- /dev/null +++ b/ml/dlib/examples/dnn_imagenet_ex.cpp @@ -0,0 +1,171 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This example shows how to classify an image into one of the 1000 imagenet + categories using the deep learning tools from the dlib C++ Library. We will + use the pretrained ResNet34 model available on the dlib website. + + The ResNet34 architecture is from the paper Deep Residual Learning for Image + Recognition by He, Zhang, Ren, and Sun. The model file that comes with dlib + was trained using the dnn_imagenet_train_ex.cpp program on a Titan X for + about 2 weeks. This pretrained model has a top5 error of 7.572% on the 2012 + imagenet validation dataset. + + For an introduction to dlib's DNN module read the dnn_introduction_ex.cpp and + dnn_introduction2_ex.cpp example programs. + + + Finally, these tools will use CUDA and cuDNN to drastically accelerate + network training and testing. CMake should automatically find them if they + are installed and configure things appropriately. If not, the program will + still run but will be much slower to execute. +*/ + + + +#include <dlib/dnn.h> +#include <iostream> +#include <dlib/data_io.h> +#include <dlib/gui_widgets.h> +#include <dlib/image_transforms.h> + +using namespace std; +using namespace dlib; + +// ---------------------------------------------------------------------------------------- + +// This block of statements defines the resnet-34 network + +template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET> +using residual = add_prev1<block<N,BN,1,tag1<SUBNET>>>; + +template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET> +using residual_down = add_prev2<avg_pool<2,2,2,2,skip1<tag2<block<N,BN,2,tag1<SUBNET>>>>>>; + +template <int N, template <typename> class BN, int stride, typename SUBNET> +using block = BN<con<N,3,3,1,1,relu<BN<con<N,3,3,stride,stride,SUBNET>>>>>; + +template <int N, typename SUBNET> using ares = relu<residual<block,N,affine,SUBNET>>; +template <int N, typename SUBNET> using ares_down = relu<residual_down<block,N,affine,SUBNET>>; + +template <typename SUBNET> using level1 = ares<512,ares<512,ares_down<512,SUBNET>>>; +template <typename SUBNET> using level2 = ares<256,ares<256,ares<256,ares<256,ares<256,ares_down<256,SUBNET>>>>>>; +template <typename SUBNET> using level3 = ares<128,ares<128,ares<128,ares_down<128,SUBNET>>>>; +template <typename SUBNET> using level4 = ares<64,ares<64,ares<64,SUBNET>>>; + +using anet_type = loss_multiclass_log<fc<1000,avg_pool_everything< + level1< + level2< + level3< + level4< + max_pool<3,3,2,2,relu<affine<con<64,7,7,2,2, + input_rgb_image_sized<227> + >>>>>>>>>>>; + +// ---------------------------------------------------------------------------------------- + +rectangle make_random_cropping_rect_resnet( + const matrix<rgb_pixel>& img, + dlib::rand& rnd +) +{ + // figure out what rectangle we want to crop from the image + double mins = 0.466666666, maxs = 0.875; + auto scale = mins + rnd.get_random_double()*(maxs-mins); + auto size = scale*std::min(img.nr(), img.nc()); + rectangle rect(size, size); + // randomly shift the box around + point offset(rnd.get_random_32bit_number()%(img.nc()-rect.width()), + rnd.get_random_32bit_number()%(img.nr()-rect.height())); + return move_rect(rect, offset); +} + +// ---------------------------------------------------------------------------------------- + +void randomly_crop_images ( + const matrix<rgb_pixel>& img, + dlib::array<matrix<rgb_pixel>>& crops, + dlib::rand& rnd, + long num_crops +) +{ + std::vector<chip_details> dets; + for (long i = 0; i < num_crops; ++i) + { + auto rect = make_random_cropping_rect_resnet(img, rnd); + dets.push_back(chip_details(rect, chip_dims(227,227))); + } + + extract_image_chips(img, dets, crops); + + for (auto&& img : crops) + { + // Also randomly flip the image + if (rnd.get_random_double() > 0.5) + img = fliplr(img); + + // And then randomly adjust the colors. + apply_random_color_offset(img, rnd); + } +} + +// ---------------------------------------------------------------------------------------- + +int main(int argc, char** argv) try +{ + if (argc == 1) + { + cout << "Give this program image files as command line arguments.\n" << endl; + cout << "You will also need a copy of the file resnet34_1000_imagenet_classifier.dnn " << endl; + cout << "available at http://dlib.net/files/resnet34_1000_imagenet_classifier.dnn.bz2" << endl; + cout << endl; + return 1; + } + + std::vector<string> labels; + anet_type net; + deserialize("resnet34_1000_imagenet_classifier.dnn") >> net >> labels; + + // Make a network with softmax as the final layer. We don't have to do this + // if we just want to output the single best prediction, since the anet_type + // already does this. But if we instead want to get the probability of each + // class as output we need to replace the last layer of the network with a + // softmax layer, which we do as follows: + softmax<anet_type::subnet_type> snet; + snet.subnet() = net.subnet(); + + dlib::array<matrix<rgb_pixel>> images; + matrix<rgb_pixel> img, crop; + + dlib::rand rnd; + image_window win; + + // Read images from the command prompt and print the top 5 best labels for each. + for (int i = 1; i < argc; ++i) + { + load_image(img, argv[i]); + const int num_crops = 16; + // Grab 16 random crops from the image. We will run all of them through the + // network and average the results. + randomly_crop_images(img, images, rnd, num_crops); + // p(i) == the probability the image contains object of class i. + matrix<float,1,1000> p = sum_rows(mat(snet(images.begin(), images.end())))/num_crops; + + win.set_image(img); + // Print the 5 most probable labels + for (int k = 0; k < 5; ++k) + { + unsigned long predicted_label = index_of_max(p); + cout << p(predicted_label) << ": " << labels[predicted_label] << endl; + p(predicted_label) = 0; + } + + cout << "Hit enter to process the next image"; + cin.get(); + } + +} +catch(std::exception& e) +{ + cout << e.what() << endl; +} + diff --git a/ml/dlib/examples/dnn_imagenet_train_ex.cpp b/ml/dlib/examples/dnn_imagenet_train_ex.cpp new file mode 100644 index 00000000..e672018d --- /dev/null +++ b/ml/dlib/examples/dnn_imagenet_train_ex.cpp @@ -0,0 +1,368 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This program was used to train the resnet34_1000_imagenet_classifier.dnn + network used by the dnn_imagenet_ex.cpp example program. + + You should be familiar with dlib's DNN module before reading this example + program. So read dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp first. +*/ + + + +#include <dlib/dnn.h> +#include <iostream> +#include <dlib/data_io.h> +#include <dlib/image_transforms.h> +#include <dlib/dir_nav.h> +#include <iterator> +#include <thread> + +using namespace std; +using namespace dlib; + +// ---------------------------------------------------------------------------------------- + +template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET> +using residual = add_prev1<block<N,BN,1,tag1<SUBNET>>>; + +template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET> +using residual_down = add_prev2<avg_pool<2,2,2,2,skip1<tag2<block<N,BN,2,tag1<SUBNET>>>>>>; + +template <int N, template <typename> class BN, int stride, typename SUBNET> +using block = BN<con<N,3,3,1,1,relu<BN<con<N,3,3,stride,stride,SUBNET>>>>>; + + +template <int N, typename SUBNET> using res = relu<residual<block,N,bn_con,SUBNET>>; +template <int N, typename SUBNET> using ares = relu<residual<block,N,affine,SUBNET>>; +template <int N, typename SUBNET> using res_down = relu<residual_down<block,N,bn_con,SUBNET>>; +template <int N, typename SUBNET> using ares_down = relu<residual_down<block,N,affine,SUBNET>>; + + +// ---------------------------------------------------------------------------------------- + +template <typename SUBNET> using level1 = res<512,res<512,res_down<512,SUBNET>>>; +template <typename SUBNET> using level2 = res<256,res<256,res<256,res<256,res<256,res_down<256,SUBNET>>>>>>; +template <typename SUBNET> using level3 = res<128,res<128,res<128,res_down<128,SUBNET>>>>; +template <typename SUBNET> using level4 = res<64,res<64,res<64,SUBNET>>>; + +template <typename SUBNET> using alevel1 = ares<512,ares<512,ares_down<512,SUBNET>>>; +template <typename SUBNET> using alevel2 = ares<256,ares<256,ares<256,ares<256,ares<256,ares_down<256,SUBNET>>>>>>; +template <typename SUBNET> using alevel3 = ares<128,ares<128,ares<128,ares_down<128,SUBNET>>>>; +template <typename SUBNET> using alevel4 = ares<64,ares<64,ares<64,SUBNET>>>; + +// training network type +using net_type = loss_multiclass_log<fc<1000,avg_pool_everything< + level1< + level2< + level3< + level4< + max_pool<3,3,2,2,relu<bn_con<con<64,7,7,2,2, + input_rgb_image_sized<227> + >>>>>>>>>>>; + +// testing network type (replaced batch normalization with fixed affine transforms) +using anet_type = loss_multiclass_log<fc<1000,avg_pool_everything< + alevel1< + alevel2< + alevel3< + alevel4< + max_pool<3,3,2,2,relu<affine<con<64,7,7,2,2, + input_rgb_image_sized<227> + >>>>>>>>>>>; + +// ---------------------------------------------------------------------------------------- + +rectangle make_random_cropping_rect_resnet( + const matrix<rgb_pixel>& img, + dlib::rand& rnd +) +{ + // figure out what rectangle we want to crop from the image + double mins = 0.466666666, maxs = 0.875; + auto scale = mins + rnd.get_random_double()*(maxs-mins); + auto size = scale*std::min(img.nr(), img.nc()); + rectangle rect(size, size); + // randomly shift the box around + point offset(rnd.get_random_32bit_number()%(img.nc()-rect.width()), + rnd.get_random_32bit_number()%(img.nr()-rect.height())); + return move_rect(rect, offset); +} + +// ---------------------------------------------------------------------------------------- + +void randomly_crop_image ( + const matrix<rgb_pixel>& img, + matrix<rgb_pixel>& crop, + dlib::rand& rnd +) +{ + auto rect = make_random_cropping_rect_resnet(img, rnd); + + // now crop it out as a 227x227 image. + extract_image_chip(img, chip_details(rect, chip_dims(227,227)), crop); + + // Also randomly flip the image + if (rnd.get_random_double() > 0.5) + crop = fliplr(crop); + + // And then randomly adjust the colors. + apply_random_color_offset(crop, rnd); +} + +void randomly_crop_images ( + const matrix<rgb_pixel>& img, + dlib::array<matrix<rgb_pixel>>& crops, + dlib::rand& rnd, + long num_crops +) +{ + std::vector<chip_details> dets; + for (long i = 0; i < num_crops; ++i) + { + auto rect = make_random_cropping_rect_resnet(img, rnd); + dets.push_back(chip_details(rect, chip_dims(227,227))); + } + + extract_image_chips(img, dets, crops); + + for (auto&& img : crops) + { + // Also randomly flip the image + if (rnd.get_random_double() > 0.5) + img = fliplr(img); + + // And then randomly adjust the colors. + apply_random_color_offset(img, rnd); + } +} + +// ---------------------------------------------------------------------------------------- + +struct image_info +{ + string filename; + string label; + long numeric_label; +}; + +std::vector<image_info> get_imagenet_train_listing( + const std::string& images_folder +) +{ + std::vector<image_info> results; + image_info temp; + temp.numeric_label = 0; + // We will loop over all the label types in the dataset, each is contained in a subfolder. + auto subdirs = directory(images_folder).get_dirs(); + // But first, sort the sub directories so the numeric labels will be assigned in sorted order. + std::sort(subdirs.begin(), subdirs.end()); + for (auto subdir : subdirs) + { + // Now get all the images in this label type + temp.label = subdir.name(); + for (auto image_file : subdir.get_files()) + { + temp.filename = image_file; + results.push_back(temp); + } + ++temp.numeric_label; + } + return results; +} + +std::vector<image_info> get_imagenet_val_listing( + const std::string& imagenet_root_dir, + const std::string& validation_images_file +) +{ + ifstream fin(validation_images_file); + string label, filename; + std::vector<image_info> results; + image_info temp; + temp.numeric_label = -1; + while(fin >> label >> filename) + { + temp.filename = imagenet_root_dir+"/"+filename; + if (!file_exists(temp.filename)) + { + cerr << "file doesn't exist! " << temp.filename << endl; + exit(1); + } + if (label != temp.label) + ++temp.numeric_label; + + temp.label = label; + results.push_back(temp); + } + + return results; +} + +// ---------------------------------------------------------------------------------------- + +int main(int argc, char** argv) try +{ + if (argc != 3) + { + cout << "To run this program you need a copy of the imagenet ILSVRC2015 dataset and" << endl; + cout << "also the file http://dlib.net/files/imagenet2015_validation_images.txt.bz2" << endl; + cout << endl; + cout << "With those things, you call this program like this: " << endl; + cout << "./dnn_imagenet_train_ex /path/to/ILSVRC2015 imagenet2015_validation_images.txt" << endl; + return 1; + } + + cout << "\nSCANNING IMAGENET DATASET\n" << endl; + + auto listing = get_imagenet_train_listing(string(argv[1])+"/Data/CLS-LOC/train/"); + cout << "images in dataset: " << listing.size() << endl; + const auto number_of_classes = listing.back().numeric_label+1; + if (listing.size() == 0 || number_of_classes != 1000) + { + cout << "Didn't find the imagenet dataset. " << endl; + return 1; + } + + set_dnn_prefer_smallest_algorithms(); + + + const double initial_learning_rate = 0.1; + const double weight_decay = 0.0001; + const double momentum = 0.9; + + net_type net; + dnn_trainer<net_type> trainer(net,sgd(weight_decay, momentum)); + trainer.be_verbose(); + trainer.set_learning_rate(initial_learning_rate); + trainer.set_synchronization_file("imagenet_trainer_state_file.dat", std::chrono::minutes(10)); + // This threshold is probably excessively large. You could likely get good results + // with a smaller value but if you aren't in a hurry this value will surely work well. + trainer.set_iterations_without_progress_threshold(20000); + // Since the progress threshold is so large might as well set the batch normalization + // stats window to something big too. + set_all_bn_running_stats_window_sizes(net, 1000); + + std::vector<matrix<rgb_pixel>> samples; + std::vector<unsigned long> labels; + + // Start a bunch of threads that read images from disk and pull out random crops. It's + // important to be sure to feed the GPU fast enough to keep it busy. Using multiple + // thread for this kind of data preparation helps us do that. Each thread puts the + // crops into the data queue. + dlib::pipe<std::pair<image_info,matrix<rgb_pixel>>> data(200); + auto f = [&data, &listing](time_t seed) + { + dlib::rand rnd(time(0)+seed); + matrix<rgb_pixel> img; + std::pair<image_info, matrix<rgb_pixel>> temp; + while(data.is_enabled()) + { + temp.first = listing[rnd.get_random_32bit_number()%listing.size()]; + load_image(img, temp.first.filename); + randomly_crop_image(img, temp.second, rnd); + data.enqueue(temp); + } + }; + std::thread data_loader1([f](){ f(1); }); + std::thread data_loader2([f](){ f(2); }); + std::thread data_loader3([f](){ f(3); }); + std::thread data_loader4([f](){ f(4); }); + + // The main training loop. Keep making mini-batches and giving them to the trainer. + // We will run until the learning rate has dropped by a factor of 1e-3. + while(trainer.get_learning_rate() >= initial_learning_rate*1e-3) + { + samples.clear(); + labels.clear(); + + // make a 160 image mini-batch + std::pair<image_info, matrix<rgb_pixel>> img; + while(samples.size() < 160) + { + data.dequeue(img); + + samples.push_back(std::move(img.second)); + labels.push_back(img.first.numeric_label); + } + + trainer.train_one_step(samples, labels); + } + + // Training done, tell threads to stop and make sure to wait for them to finish before + // moving on. + data.disable(); + data_loader1.join(); + data_loader2.join(); + data_loader3.join(); + data_loader4.join(); + + // also wait for threaded processing to stop in the trainer. + trainer.get_net(); + + net.clean(); + cout << "saving network" << endl; + serialize("resnet34.dnn") << net; + + + + + + + // Now test the network on the imagenet validation dataset. First, make a testing + // network with softmax as the final layer. We don't have to do this if we just wanted + // to test the "top1 accuracy" since the normal network outputs the class prediction. + // But this snet object will make getting the top5 predictions easy as it directly + // outputs the probability of each class as its final output. + softmax<anet_type::subnet_type> snet; snet.subnet() = net.subnet(); + + cout << "Testing network on imagenet validation dataset..." << endl; + int num_right = 0; + int num_wrong = 0; + int num_right_top1 = 0; + int num_wrong_top1 = 0; + dlib::rand rnd(time(0)); + // loop over all the imagenet validation images + for (auto l : get_imagenet_val_listing(argv[1], argv[2])) + { + dlib::array<matrix<rgb_pixel>> images; + matrix<rgb_pixel> img; + load_image(img, l.filename); + // Grab 16 random crops from the image. We will run all of them through the + // network and average the results. + const int num_crops = 16; + randomly_crop_images(img, images, rnd, num_crops); + // p(i) == the probability the image contains object of class i. + matrix<float,1,1000> p = sum_rows(mat(snet(images.begin(), images.end())))/num_crops; + + // check top 1 accuracy + if (index_of_max(p) == l.numeric_label) + ++num_right_top1; + else + ++num_wrong_top1; + + // check top 5 accuracy + bool found_match = false; + for (int k = 0; k < 5; ++k) + { + long predicted_label = index_of_max(p); + p(predicted_label) = 0; + if (predicted_label == l.numeric_label) + { + found_match = true; + break; + } + + } + if (found_match) + ++num_right; + else + ++num_wrong; + } + cout << "val top5 accuracy: " << num_right/(double)(num_right+num_wrong) << endl; + cout << "val top1 accuracy: " << num_right_top1/(double)(num_right_top1+num_wrong_top1) << endl; +} +catch(std::exception& e) +{ + cout << e.what() << endl; +} + diff --git a/ml/dlib/examples/dnn_inception_ex.cpp b/ml/dlib/examples/dnn_inception_ex.cpp new file mode 100644 index 00000000..6b2c1727 --- /dev/null +++ b/ml/dlib/examples/dnn_inception_ex.cpp @@ -0,0 +1,154 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This is an example illustrating the use of the deep learning tools from the + dlib C++ Library. I'm assuming you have already read the introductory + dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp examples. In this + example we are going to show how to create inception networks. + + An inception network is composed of inception blocks of the form: + + input from SUBNET + / | \ + / | \ + block1 block2 ... blockN + \ | / + \ | / + concatenate tensors from blocks + | + output + + That is, an inception block runs a number of smaller networks (e.g. block1, + block2) and then concatenates their results. For further reading refer to: + Szegedy, Christian, et al. "Going deeper with convolutions." Proceedings of + the IEEE Conference on Computer Vision and Pattern Recognition. 2015. +*/ + +#include <dlib/dnn.h> +#include <iostream> +#include <dlib/data_io.h> + +using namespace std; +using namespace dlib; + +// Inception layer has some different convolutions inside. Here we define +// blocks as convolutions with different kernel size that we will use in +// inception layer block. +template <typename SUBNET> using block_a1 = relu<con<10,1,1,1,1,SUBNET>>; +template <typename SUBNET> using block_a2 = relu<con<10,3,3,1,1,relu<con<16,1,1,1,1,SUBNET>>>>; +template <typename SUBNET> using block_a3 = relu<con<10,5,5,1,1,relu<con<16,1,1,1,1,SUBNET>>>>; +template <typename SUBNET> using block_a4 = relu<con<10,1,1,1,1,max_pool<3,3,1,1,SUBNET>>>; + +// Here is inception layer definition. It uses different blocks to process input +// and returns combined output. Dlib includes a number of these inceptionN +// layer types which are themselves created using concat layers. +template <typename SUBNET> using incept_a = inception4<block_a1,block_a2,block_a3,block_a4, SUBNET>; + +// Network can have inception layers of different structure. It will work +// properly so long as all the sub-blocks inside a particular inception block +// output tensors with the same number of rows and columns. +template <typename SUBNET> using block_b1 = relu<con<4,1,1,1,1,SUBNET>>; +template <typename SUBNET> using block_b2 = relu<con<4,3,3,1,1,SUBNET>>; +template <typename SUBNET> using block_b3 = relu<con<4,1,1,1,1,max_pool<3,3,1,1,SUBNET>>>; +template <typename SUBNET> using incept_b = inception3<block_b1,block_b2,block_b3,SUBNET>; + +// Now we can define a simple network for classifying MNIST digits. We will +// train and test this network in the code below. +using net_type = loss_multiclass_log< + fc<10, + relu<fc<32, + max_pool<2,2,2,2,incept_b< + max_pool<2,2,2,2,incept_a< + input<matrix<unsigned char>> + >>>>>>>>; + +int main(int argc, char** argv) try +{ + // This example is going to run on the MNIST dataset. + if (argc != 2) + { + cout << "This example needs the MNIST dataset to run!" << endl; + cout << "You can get MNIST from http://yann.lecun.com/exdb/mnist/" << endl; + cout << "Download the 4 files that comprise the dataset, decompress them, and" << endl; + cout << "put them in a folder. Then give that folder as input to this program." << endl; + return 1; + } + + + std::vector<matrix<unsigned char>> training_images; + std::vector<unsigned long> training_labels; + std::vector<matrix<unsigned char>> testing_images; + std::vector<unsigned long> testing_labels; + load_mnist_dataset(argv[1], training_images, training_labels, testing_images, testing_labels); + + + // Make an instance of our inception network. + net_type net; + cout << "The net has " << net.num_layers << " layers in it." << endl; + cout << net << endl; + + + cout << "Traning NN..." << endl; + dnn_trainer<net_type> trainer(net); + trainer.set_learning_rate(0.01); + trainer.set_min_learning_rate(0.00001); + trainer.set_mini_batch_size(128); + trainer.be_verbose(); + trainer.set_synchronization_file("inception_sync", std::chrono::seconds(20)); + // Train the network. This might take a few minutes... + trainer.train(training_images, training_labels); + + // At this point our net object should have learned how to classify MNIST images. But + // before we try it out let's save it to disk. Note that, since the trainer has been + // running images through the network, net will have a bunch of state in it related to + // the last batch of images it processed (e.g. outputs from each layer). Since we + // don't care about saving that kind of stuff to disk we can tell the network to forget + // about that kind of transient data so that our file will be smaller. We do this by + // "cleaning" the network before saving it. + net.clean(); + serialize("mnist_network_inception.dat") << net; + // Now if we later wanted to recall the network from disk we can simply say: + // deserialize("mnist_network_inception.dat") >> net; + + + // Now let's run the training images through the network. This statement runs all the + // images through it and asks the loss layer to convert the network's raw output into + // labels. In our case, these labels are the numbers between 0 and 9. + std::vector<unsigned long> predicted_labels = net(training_images); + int num_right = 0; + int num_wrong = 0; + // And then let's see if it classified them correctly. + for (size_t i = 0; i < training_images.size(); ++i) + { + if (predicted_labels[i] == training_labels[i]) + ++num_right; + else + ++num_wrong; + + } + cout << "training num_right: " << num_right << endl; + cout << "training num_wrong: " << num_wrong << endl; + cout << "training accuracy: " << num_right/(double)(num_right+num_wrong) << endl; + + // Let's also see if the network can correctly classify the testing images. + // Since MNIST is an easy dataset, we should see 99% accuracy. + predicted_labels = net(testing_images); + num_right = 0; + num_wrong = 0; + for (size_t i = 0; i < testing_images.size(); ++i) + { + if (predicted_labels[i] == testing_labels[i]) + ++num_right; + else + ++num_wrong; + + } + cout << "testing num_right: " << num_right << endl; + cout << "testing num_wrong: " << num_wrong << endl; + cout << "testing accuracy: " << num_right/(double)(num_right+num_wrong) << endl; + +} +catch(std::exception& e) +{ + cout << e.what() << endl; +} + diff --git a/ml/dlib/examples/dnn_introduction2_ex.cpp b/ml/dlib/examples/dnn_introduction2_ex.cpp new file mode 100644 index 00000000..70b6edee --- /dev/null +++ b/ml/dlib/examples/dnn_introduction2_ex.cpp @@ -0,0 +1,388 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This is an example illustrating the use of the deep learning tools from the + dlib C++ Library. I'm assuming you have already read the dnn_introduction_ex.cpp + example. So in this example program I'm going to go over a number of more + advanced parts of the API, including: + - Using multiple GPUs + - Training on large datasets that don't fit in memory + - Defining large networks + - Accessing and configuring layers in a network +*/ + +#include <dlib/dnn.h> +#include <iostream> +#include <dlib/data_io.h> + +using namespace std; +using namespace dlib; + +// ---------------------------------------------------------------------------------------- + +// Let's start by showing how you can conveniently define large and complex +// networks. The most important tool for doing this are C++'s alias templates. +// These let us define new layer types that are combinations of a bunch of other +// layers. These will form the building blocks for more complex networks. + +// So let's begin by defining the building block of a residual network (see +// Figure 2 in Deep Residual Learning for Image Recognition by He, Zhang, Ren, +// and Sun). We are going to decompose the residual block into a few alias +// statements. First, we define the core block. + +// Here we have parameterized the "block" layer on a BN layer (nominally some +// kind of batch normalization), the number of filter outputs N, and the stride +// the block operates at. +template < + int N, + template <typename> class BN, + int stride, + typename SUBNET + > +using block = BN<con<N,3,3,1,1,relu<BN<con<N,3,3,stride,stride,SUBNET>>>>>; + +// Next, we need to define the skip layer mechanism used in the residual network +// paper. They create their blocks by adding the input tensor to the output of +// each block. So we define an alias statement that takes a block and wraps it +// with this skip/add structure. + +// Note the tag layer. This layer doesn't do any computation. It exists solely +// so other layers can refer to it. In this case, the add_prev1 layer looks for +// the tag1 layer and will take the tag1 output and add it to the input of the +// add_prev1 layer. This combination allows us to implement skip and residual +// style networks. We have also set the block stride to 1 in this statement. +// The significance of that is explained next. +template < + template <int,template<typename>class,int,typename> class block, + int N, + template<typename>class BN, + typename SUBNET + > +using residual = add_prev1<block<N,BN,1,tag1<SUBNET>>>; + +// Some residual blocks do downsampling. They do this by using a stride of 2 +// instead of 1. However, when downsampling we need to also take care to +// downsample the part of the network that adds the original input to the output +// or the sizes won't make sense (the network will still run, but the results +// aren't as good). So here we define a downsampling version of residual. In +// it, we make use of the skip1 layer. This layer simply outputs whatever is +// output by the tag1 layer. Therefore, the skip1 layer (there are also skip2, +// skip3, etc. in dlib) allows you to create branching network structures. + +// residual_down creates a network structure like this: +/* + input from SUBNET + / \ + / \ + block downsample(using avg_pool) + \ / + \ / + add tensors (using add_prev2 which adds the output of tag2 with avg_pool's output) + | + output +*/ +template < + template <int,template<typename>class,int,typename> class block, + int N, + template<typename>class BN, + typename SUBNET + > +using residual_down = add_prev2<avg_pool<2,2,2,2,skip1<tag2<block<N,BN,2,tag1<SUBNET>>>>>>; + + + +// Now we can define 4 different residual blocks we will use in this example. +// The first two are non-downsampling residual blocks while the last two +// downsample. Also, res and res_down use batch normalization while ares and +// ares_down have had the batch normalization replaced with simple affine +// layers. We will use the affine version of the layers when testing our +// networks. +template <typename SUBNET> using res = relu<residual<block,8,bn_con,SUBNET>>; +template <typename SUBNET> using ares = relu<residual<block,8,affine,SUBNET>>; +template <typename SUBNET> using res_down = relu<residual_down<block,8,bn_con,SUBNET>>; +template <typename SUBNET> using ares_down = relu<residual_down<block,8,affine,SUBNET>>; + + + +// Now that we have these convenient aliases, we can define a residual network +// without a lot of typing. Note the use of a repeat layer. This special layer +// type allows us to type repeat<9,res,SUBNET> instead of +// res<res<res<res<res<res<res<res<res<SUBNET>>>>>>>>>. It will also prevent +// the compiler from complaining about super deep template nesting when creating +// large networks. +const unsigned long number_of_classes = 10; +using net_type = loss_multiclass_log<fc<number_of_classes, + avg_pool_everything< + res<res<res<res_down< + repeat<9,res, // repeat this layer 9 times + res_down< + res< + input<matrix<unsigned char>> + >>>>>>>>>>; + + +// And finally, let's define a residual network building block that uses +// parametric ReLU units instead of regular ReLU. +template <typename SUBNET> +using pres = prelu<add_prev1<bn_con<con<8,3,3,1,1,prelu<bn_con<con<8,3,3,1,1,tag1<SUBNET>>>>>>>>; + +// ---------------------------------------------------------------------------------------- + +int main(int argc, char** argv) try +{ + if (argc != 2) + { + cout << "This example needs the MNIST dataset to run!" << endl; + cout << "You can get MNIST from http://yann.lecun.com/exdb/mnist/" << endl; + cout << "Download the 4 files that comprise the dataset, decompress them, and" << endl; + cout << "put them in a folder. Then give that folder as input to this program." << endl; + return 1; + } + + std::vector<matrix<unsigned char>> training_images; + std::vector<unsigned long> training_labels; + std::vector<matrix<unsigned char>> testing_images; + std::vector<unsigned long> testing_labels; + load_mnist_dataset(argv[1], training_images, training_labels, testing_images, testing_labels); + + + // dlib uses cuDNN under the covers. One of the features of cuDNN is the + // option to use slower methods that use less RAM or faster methods that use + // a lot of RAM. If you find that you run out of RAM on your graphics card + // then you can call this function and we will request the slower but more + // RAM frugal cuDNN algorithms. + set_dnn_prefer_smallest_algorithms(); + + + // Create a network as defined above. This network will produce 10 outputs + // because that's how we defined net_type. However, fc layers can have the + // number of outputs they produce changed at runtime. + net_type net; + // So if you wanted to use the same network but override the number of + // outputs at runtime you can do so like this: + net_type net2(num_fc_outputs(15)); + + // Now, let's imagine we wanted to replace some of the relu layers with + // prelu layers. We might do it like this: + using net_type2 = loss_multiclass_log<fc<number_of_classes, + avg_pool_everything< + pres<res<res<res_down< // 2 prelu layers here + tag4<repeat<9,pres, // 9 groups, each containing 2 prelu layers + res_down< + res< + input<matrix<unsigned char>> + >>>>>>>>>>>; + + // prelu layers have a floating point parameter. If you want to set it to + // something other than its default value you can do so like this: + net_type2 pnet(prelu_(0.2), + prelu_(0.25), + repeat_group(prelu_(0.3),prelu_(0.4)) // Initialize all the prelu instances in the repeat + // layer. repeat_group() is needed to group the + // things that are part of repeat's block. + ); + // As you can see, a network will greedily assign things given to its + // constructor to the layers inside itself. The assignment is done in the + // order the layers are defined, but it will skip layers where the + // assignment doesn't make sense. + + // Now let's print the details of the pnet to the screen and inspect it. + cout << "The pnet has " << pnet.num_layers << " layers in it." << endl; + cout << pnet << endl; + // These print statements will output this (I've truncated it since it's + // long, but you get the idea): + /* + The pnet has 131 layers in it. + layer<0> loss_multiclass_log + layer<1> fc (num_outputs=10) learning_rate_mult=1 weight_decay_mult=1 bias_learning_rate_mult=1 bias_weight_decay_mult=0 + layer<2> avg_pool (nr=0, nc=0, stride_y=1, stride_x=1, padding_y=0, padding_x=0) + layer<3> prelu (initial_param_value=0.2) + layer<4> add_prev1 + layer<5> bn_con eps=1e-05 learning_rate_mult=1 weight_decay_mult=0 bias_learning_rate_mult=1 bias_weight_decay_mult=1 + layer<6> con (num_filters=8, nr=3, nc=3, stride_y=1, stride_x=1, padding_y=1, padding_x=1) learning_rate_mult=1 weight_decay_mult=1 bias_learning_rate_mult=1 bias_weight_decay_mult=0 + layer<7> prelu (initial_param_value=0.25) + layer<8> bn_con eps=1e-05 learning_rate_mult=1 weight_decay_mult=0 bias_learning_rate_mult=1 bias_weight_decay_mult=1 + layer<9> con (num_filters=8, nr=3, nc=3, stride_y=1, stride_x=1, padding_y=1, padding_x=1) learning_rate_mult=1 weight_decay_mult=1 bias_learning_rate_mult=1 bias_weight_decay_mult=0 + layer<10> tag1 + ... + layer<34> relu + layer<35> bn_con eps=1e-05 learning_rate_mult=1 weight_decay_mult=0 bias_learning_rate_mult=1 bias_weight_decay_mult=1 + layer<36> con (num_filters=8, nr=3, nc=3, stride_y=2, stride_x=2, padding_y=0, padding_x=0) learning_rate_mult=1 weight_decay_mult=1 bias_learning_rate_mult=1 bias_weight_decay_mult=0 + layer<37> tag1 + layer<38> tag4 + layer<39> prelu (initial_param_value=0.3) + layer<40> add_prev1 + layer<41> bn_con eps=1e-05 learning_rate_mult=1 weight_decay_mult=0 bias_learning_rate_mult=1 bias_weight_decay_mult=1 + ... + layer<118> relu + layer<119> bn_con eps=1e-05 learning_rate_mult=1 weight_decay_mult=0 bias_learning_rate_mult=1 bias_weight_decay_mult=1 + layer<120> con (num_filters=8, nr=3, nc=3, stride_y=2, stride_x=2, padding_y=0, padding_x=0) learning_rate_mult=1 weight_decay_mult=1 bias_learning_rate_mult=1 bias_weight_decay_mult=0 + layer<121> tag1 + layer<122> relu + layer<123> add_prev1 + layer<124> bn_con eps=1e-05 learning_rate_mult=1 weight_decay_mult=0 bias_learning_rate_mult=1 bias_weight_decay_mult=1 + layer<125> con (num_filters=8, nr=3, nc=3, stride_y=1, stride_x=1, padding_y=1, padding_x=1) learning_rate_mult=1 weight_decay_mult=1 bias_learning_rate_mult=1 bias_weight_decay_mult=0 + layer<126> relu + layer<127> bn_con eps=1e-05 learning_rate_mult=1 weight_decay_mult=0 bias_learning_rate_mult=1 bias_weight_decay_mult=1 + layer<128> con (num_filters=8, nr=3, nc=3, stride_y=1, stride_x=1, padding_y=1, padding_x=1) learning_rate_mult=1 weight_decay_mult=1 bias_learning_rate_mult=1 bias_weight_decay_mult=0 + layer<129> tag1 + layer<130> input<matrix> + */ + + // Now that we know the index numbers for each layer, we can access them + // individually using layer<index>(pnet). For example, to access the output + // tensor for the first prelu layer we can say: + layer<3>(pnet).get_output(); + // Or to print the prelu parameter for layer 7 we can say: + cout << "prelu param: "<< layer<7>(pnet).layer_details().get_initial_param_value() << endl; + + // We can also access layers by their type. This next statement finds the + // first tag1 layer in pnet, and is therefore equivalent to calling + // layer<10>(pnet): + layer<tag1>(pnet); + // The tag layers don't do anything at all and exist simply so you can tag + // parts of your network and access them by layer<tag>(). You can also + // index relative to a tag. So for example, to access the layer immediately + // after tag4 you can say: + layer<tag4,1>(pnet); // Equivalent to layer<38+1>(pnet). + + // Or to access the layer 2 layers after tag4: + layer<tag4,2>(pnet); + // Tagging is a very useful tool for making complex network structures. For + // example, the add_prev1 layer is implemented internally by using a call to + // layer<tag1>(). + + + + // Ok, that's enough talk about defining and inspecting networks. Let's + // talk about training networks! + + // The dnn_trainer will use SGD by default, but you can tell it to use + // different solvers like adam with a weight decay of 0.0005 and the given + // momentum parameters. + dnn_trainer<net_type,adam> trainer(net,adam(0.0005, 0.9, 0.999)); + // Also, if you have multiple graphics cards you can tell the trainer to use + // them together to make the training faster. For example, replacing the + // above constructor call with this one would cause it to use GPU cards 0 + // and 1. + //dnn_trainer<net_type,adam> trainer(net,adam(0.0005, 0.9, 0.999), {0,1}); + + trainer.be_verbose(); + // While the trainer is running it keeps an eye on the training error. If + // it looks like the error hasn't decreased for the last 2000 iterations it + // will automatically reduce the learning rate by 0.1. You can change these + // default parameters to some other values by calling these functions. Or + // disable the automatic shrinking entirely by setting the shrink factor to 1. + trainer.set_iterations_without_progress_threshold(2000); + trainer.set_learning_rate_shrink_factor(0.1); + // The learning rate will start at 1e-3. + trainer.set_learning_rate(1e-3); + trainer.set_synchronization_file("mnist_resnet_sync", std::chrono::seconds(100)); + + + // Now, what if your training dataset is so big it doesn't fit in RAM? You + // make mini-batches yourself, any way you like, and you send them to the + // trainer by repeatedly calling trainer.train_one_step(). + // + // For example, the loop below stream MNIST data to out trainer. + std::vector<matrix<unsigned char>> mini_batch_samples; + std::vector<unsigned long> mini_batch_labels; + dlib::rand rnd(time(0)); + // Loop until the trainer's automatic shrinking has shrunk the learning rate to 1e-6. + // Given our settings, this means it will stop training after it has shrunk the + // learning rate 3 times. + while(trainer.get_learning_rate() >= 1e-6) + { + mini_batch_samples.clear(); + mini_batch_labels.clear(); + + // make a 128 image mini-batch + while(mini_batch_samples.size() < 128) + { + auto idx = rnd.get_random_32bit_number()%training_images.size(); + mini_batch_samples.push_back(training_images[idx]); + mini_batch_labels.push_back(training_labels[idx]); + } + + // Tell the trainer to update the network given this mini-batch + trainer.train_one_step(mini_batch_samples, mini_batch_labels); + + // You can also feed validation data into the trainer by periodically + // calling trainer.test_one_step(samples,labels). Unlike train_one_step(), + // test_one_step() doesn't modify the network, it only computes the testing + // error which it records internally. This testing error will then be print + // in the verbose logging and will also determine when the trainer's + // automatic learning rate shrinking happens. Therefore, test_one_step() + // can be used to perform automatic early stopping based on held out data. + } + + // When you call train_one_step(), the trainer will do its processing in a + // separate thread. This allows the main thread to work on loading data + // while the trainer is busy executing the mini-batches in parallel. + // However, this also means we need to wait for any mini-batches that are + // still executing to stop before we mess with the net object. Calling + // get_net() performs the necessary synchronization. + trainer.get_net(); + + + net.clean(); + serialize("mnist_res_network.dat") << net; + + + // Now we have a trained network. However, it has batch normalization + // layers in it. As is customary, we should replace these with simple + // affine layers before we use the network. This can be accomplished by + // making a network type which is identical to net_type but with the batch + // normalization layers replaced with affine. For example: + using test_net_type = loss_multiclass_log<fc<number_of_classes, + avg_pool_everything< + ares<ares<ares<ares_down< + repeat<9,ares, + ares_down< + ares< + input<matrix<unsigned char>> + >>>>>>>>>>; + // Then we can simply assign our trained net to our testing net. + test_net_type tnet = net; + // Or if you only had a file with your trained network you could deserialize + // it directly into your testing network. + deserialize("mnist_res_network.dat") >> tnet; + + + // And finally, we can run the testing network over our data. + + std::vector<unsigned long> predicted_labels = tnet(training_images); + int num_right = 0; + int num_wrong = 0; + for (size_t i = 0; i < training_images.size(); ++i) + { + if (predicted_labels[i] == training_labels[i]) + ++num_right; + else + ++num_wrong; + + } + cout << "training num_right: " << num_right << endl; + cout << "training num_wrong: " << num_wrong << endl; + cout << "training accuracy: " << num_right/(double)(num_right+num_wrong) << endl; + + predicted_labels = tnet(testing_images); + num_right = 0; + num_wrong = 0; + for (size_t i = 0; i < testing_images.size(); ++i) + { + if (predicted_labels[i] == testing_labels[i]) + ++num_right; + else + ++num_wrong; + + } + cout << "testing num_right: " << num_right << endl; + cout << "testing num_wrong: " << num_wrong << endl; + cout << "testing accuracy: " << num_right/(double)(num_right+num_wrong) << endl; + +} +catch(std::exception& e) +{ + cout << e.what() << endl; +} + diff --git a/ml/dlib/examples/dnn_introduction_ex.cpp b/ml/dlib/examples/dnn_introduction_ex.cpp new file mode 100644 index 00000000..6ae3ddf7 --- /dev/null +++ b/ml/dlib/examples/dnn_introduction_ex.cpp @@ -0,0 +1,170 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This is an example illustrating the use of the deep learning tools from the + dlib C++ Library. In it, we will train the venerable LeNet convolutional + neural network to recognize hand written digits. The network will take as + input a small image and classify it as one of the 10 numeric digits between + 0 and 9. + + The specific network we will run is from the paper + LeCun, Yann, et al. "Gradient-based learning applied to document recognition." + Proceedings of the IEEE 86.11 (1998): 2278-2324. + except that we replace the sigmoid non-linearities with rectified linear units. + + These tools will use CUDA and cuDNN to drastically accelerate network + training and testing. CMake should automatically find them if they are + installed and configure things appropriately. If not, the program will + still run but will be much slower to execute. +*/ + + +#include <dlib/dnn.h> +#include <iostream> +#include <dlib/data_io.h> + +using namespace std; +using namespace dlib; + +int main(int argc, char** argv) try +{ + // This example is going to run on the MNIST dataset. + if (argc != 2) + { + cout << "This example needs the MNIST dataset to run!" << endl; + cout << "You can get MNIST from http://yann.lecun.com/exdb/mnist/" << endl; + cout << "Download the 4 files that comprise the dataset, decompress them, and" << endl; + cout << "put them in a folder. Then give that folder as input to this program." << endl; + return 1; + } + + + // MNIST is broken into two parts, a training set of 60000 images and a test set of + // 10000 images. Each image is labeled so that we know what hand written digit is + // depicted. These next statements load the dataset into memory. + std::vector<matrix<unsigned char>> training_images; + std::vector<unsigned long> training_labels; + std::vector<matrix<unsigned char>> testing_images; + std::vector<unsigned long> testing_labels; + load_mnist_dataset(argv[1], training_images, training_labels, testing_images, testing_labels); + + + // Now let's define the LeNet. Broadly speaking, there are 3 parts to a network + // definition. The loss layer, a bunch of computational layers, and then an input + // layer. You can see these components in the network definition below. + // + // The input layer here says the network expects to be given matrix<unsigned char> + // objects as input. In general, you can use any dlib image or matrix type here, or + // even define your own types by creating custom input layers. + // + // Then the middle layers define the computation the network will do to transform the + // input into whatever we want. Here we run the image through multiple convolutions, + // ReLU units, max pooling operations, and then finally a fully connected layer that + // converts the whole thing into just 10 numbers. + // + // Finally, the loss layer defines the relationship between the network outputs, our 10 + // numbers, and the labels in our dataset. Since we selected loss_multiclass_log it + // means we want to do multiclass classification with our network. Moreover, the + // number of network outputs (i.e. 10) is the number of possible labels. Whichever + // network output is largest is the predicted label. So for example, if the first + // network output is largest then the predicted digit is 0, if the last network output + // is largest then the predicted digit is 9. + using net_type = loss_multiclass_log< + fc<10, + relu<fc<84, + relu<fc<120, + max_pool<2,2,2,2,relu<con<16,5,5,1,1, + max_pool<2,2,2,2,relu<con<6,5,5,1,1, + input<matrix<unsigned char>> + >>>>>>>>>>>>; + // This net_type defines the entire network architecture. For example, the block + // relu<fc<84,SUBNET>> means we take the output from the subnetwork, pass it through a + // fully connected layer with 84 outputs, then apply ReLU. Similarly, a block of + // max_pool<2,2,2,2,relu<con<16,5,5,1,1,SUBNET>>> means we apply 16 convolutions with a + // 5x5 filter size and 1x1 stride to the output of a subnetwork, then apply ReLU, then + // perform max pooling with a 2x2 window and 2x2 stride. + + + + // So with that out of the way, we can make a network instance. + net_type net; + // And then train it using the MNIST data. The code below uses mini-batch stochastic + // gradient descent with an initial learning rate of 0.01 to accomplish this. + dnn_trainer<net_type> trainer(net); + trainer.set_learning_rate(0.01); + trainer.set_min_learning_rate(0.00001); + trainer.set_mini_batch_size(128); + trainer.be_verbose(); + // Since DNN training can take a long time, we can ask the trainer to save its state to + // a file named "mnist_sync" every 20 seconds. This way, if we kill this program and + // start it again it will begin where it left off rather than restarting the training + // from scratch. This is because, when the program restarts, this call to + // set_synchronization_file() will automatically reload the settings from mnist_sync if + // the file exists. + trainer.set_synchronization_file("mnist_sync", std::chrono::seconds(20)); + // Finally, this line begins training. By default, it runs SGD with our specified + // learning rate until the loss stops decreasing. Then it reduces the learning rate by + // a factor of 10 and continues running until the loss stops decreasing again. It will + // keep doing this until the learning rate has dropped below the min learning rate + // defined above or the maximum number of epochs as been executed (defaulted to 10000). + trainer.train(training_images, training_labels); + + // At this point our net object should have learned how to classify MNIST images. But + // before we try it out let's save it to disk. Note that, since the trainer has been + // running images through the network, net will have a bunch of state in it related to + // the last batch of images it processed (e.g. outputs from each layer). Since we + // don't care about saving that kind of stuff to disk we can tell the network to forget + // about that kind of transient data so that our file will be smaller. We do this by + // "cleaning" the network before saving it. + net.clean(); + serialize("mnist_network.dat") << net; + // Now if we later wanted to recall the network from disk we can simply say: + // deserialize("mnist_network.dat") >> net; + + + // Now let's run the training images through the network. This statement runs all the + // images through it and asks the loss layer to convert the network's raw output into + // labels. In our case, these labels are the numbers between 0 and 9. + std::vector<unsigned long> predicted_labels = net(training_images); + int num_right = 0; + int num_wrong = 0; + // And then let's see if it classified them correctly. + for (size_t i = 0; i < training_images.size(); ++i) + { + if (predicted_labels[i] == training_labels[i]) + ++num_right; + else + ++num_wrong; + + } + cout << "training num_right: " << num_right << endl; + cout << "training num_wrong: " << num_wrong << endl; + cout << "training accuracy: " << num_right/(double)(num_right+num_wrong) << endl; + + // Let's also see if the network can correctly classify the testing images. Since + // MNIST is an easy dataset, we should see at least 99% accuracy. + predicted_labels = net(testing_images); + num_right = 0; + num_wrong = 0; + for (size_t i = 0; i < testing_images.size(); ++i) + { + if (predicted_labels[i] == testing_labels[i]) + ++num_right; + else + ++num_wrong; + + } + cout << "testing num_right: " << num_right << endl; + cout << "testing num_wrong: " << num_wrong << endl; + cout << "testing accuracy: " << num_right/(double)(num_right+num_wrong) << endl; + + + // Finally, you can also save network parameters to XML files if you want to do + // something with the network in another tool. For example, you could use dlib's + // tools/convert_dlib_nets_to_caffe to convert the network to a caffe model. + net_to_xml(net, "lenet.xml"); +} +catch(std::exception& e) +{ + cout << e.what() << endl; +} + diff --git a/ml/dlib/examples/dnn_metric_learning_ex.cpp b/ml/dlib/examples/dnn_metric_learning_ex.cpp new file mode 100644 index 00000000..54f2e6e8 --- /dev/null +++ b/ml/dlib/examples/dnn_metric_learning_ex.cpp @@ -0,0 +1,128 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This is an example illustrating the use of the deep learning tools from the + dlib C++ Library. In it, we will show how to use the loss_metric layer to do + metric learning. + + The main reason you might want to use this kind of algorithm is because you + would like to use a k-nearest neighbor classifier or similar algorithm, but + you don't know a good way to calculate the distance between two things. A + popular example would be face recognition. There are a whole lot of papers + that train some kind of deep metric learning algorithm that embeds face + images in some vector space where images of the same person are close to each + other and images of different people are far apart. Then in that vector + space it's very easy to do face recognition with some kind of k-nearest + neighbor classifier. + + To keep this example as simple as possible we won't do face recognition. + Instead, we will create a very simple network and use it to learn a mapping + from 8D vectors to 2D vectors such that vectors with the same class labels + are near each other. If you want to see a more complex example that learns + the kind of network you would use for something like face recognition read + the dnn_metric_learning_on_images_ex.cpp example. + + You should also have read the examples that introduce the dlib DNN API before + continuing. These are dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp. +*/ + + +#include <dlib/dnn.h> +#include <iostream> + +using namespace std; +using namespace dlib; + + +int main() try +{ + // The API for doing metric learning is very similar to the API for + // multi-class classification. In fact, the inputs are the same, a bunch of + // labeled objects. So here we create our dataset. We make up some simple + // vectors and label them with the integers 1,2,3,4. The specific values of + // the integer labels don't matter. + std::vector<matrix<double,0,1>> samples; + std::vector<unsigned long> labels; + + // class 1 training vectors + samples.push_back({1,0,0,0,0,0,0,0}); labels.push_back(1); + samples.push_back({0,1,0,0,0,0,0,0}); labels.push_back(1); + + // class 2 training vectors + samples.push_back({0,0,1,0,0,0,0,0}); labels.push_back(2); + samples.push_back({0,0,0,1,0,0,0,0}); labels.push_back(2); + + // class 3 training vectors + samples.push_back({0,0,0,0,1,0,0,0}); labels.push_back(3); + samples.push_back({0,0,0,0,0,1,0,0}); labels.push_back(3); + + // class 4 training vectors + samples.push_back({0,0,0,0,0,0,1,0}); labels.push_back(4); + samples.push_back({0,0,0,0,0,0,0,1}); labels.push_back(4); + + + // Make a network that simply learns a linear mapping from 8D vectors to 2D + // vectors. + using net_type = loss_metric<fc<2,input<matrix<double,0,1>>>>; + net_type net; + dnn_trainer<net_type> trainer(net); + trainer.set_learning_rate(0.1); + + // It should be emphasized out that it's really important that each mini-batch contain + // multiple instances of each class of object. This is because the metric learning + // algorithm needs to consider pairs of objects that should be close as well as pairs + // of objects that should be far apart during each training step. Here we just keep + // training on the same small batch so this constraint is trivially satisfied. + while(trainer.get_learning_rate() >= 1e-4) + trainer.train_one_step(samples, labels); + + // Wait for training threads to stop + trainer.get_net(); + cout << "done training" << endl; + + + // Run all the samples through the network to get their 2D vector embeddings. + std::vector<matrix<float,0,1>> embedded = net(samples); + + // Print the embedding for each sample to the screen. If you look at the + // outputs carefully you should notice that they are grouped together in 2D + // space according to their label. + for (size_t i = 0; i < embedded.size(); ++i) + cout << "label: " << labels[i] << "\t" << trans(embedded[i]); + + // Now, check if the embedding puts things with the same labels near each other and + // things with different labels far apart. + int num_right = 0; + int num_wrong = 0; + for (size_t i = 0; i < embedded.size(); ++i) + { + for (size_t j = i+1; j < embedded.size(); ++j) + { + if (labels[i] == labels[j]) + { + // The loss_metric layer will cause things with the same label to be less + // than net.loss_details().get_distance_threshold() distance from each + // other. So we can use that distance value as our testing threshold for + // "being near to each other". + if (length(embedded[i]-embedded[j]) < net.loss_details().get_distance_threshold()) + ++num_right; + else + ++num_wrong; + } + else + { + if (length(embedded[i]-embedded[j]) >= net.loss_details().get_distance_threshold()) + ++num_right; + else + ++num_wrong; + } + } + } + + cout << "num_right: "<< num_right << endl; + cout << "num_wrong: "<< num_wrong << endl; +} +catch(std::exception& e) +{ + cout << e.what() << endl; +} + diff --git a/ml/dlib/examples/dnn_metric_learning_on_images_ex.cpp b/ml/dlib/examples/dnn_metric_learning_on_images_ex.cpp new file mode 100644 index 00000000..4c3856ac --- /dev/null +++ b/ml/dlib/examples/dnn_metric_learning_on_images_ex.cpp @@ -0,0 +1,340 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This is an example illustrating the use of the deep learning tools from the + dlib C++ Library. In it, we will show how to use the loss_metric layer to do + metric learning on images. + + The main reason you might want to use this kind of algorithm is because you + would like to use a k-nearest neighbor classifier or similar algorithm, but + you don't know a good way to calculate the distance between two things. A + popular example would be face recognition. There are a whole lot of papers + that train some kind of deep metric learning algorithm that embeds face + images in some vector space where images of the same person are close to each + other and images of different people are far apart. Then in that vector + space it's very easy to do face recognition with some kind of k-nearest + neighbor classifier. + + In this example we will use a version of the ResNet network from the + dnn_imagenet_ex.cpp example to learn to map images into some vector space where + pictures of the same person are close and pictures of different people are far + apart. + + You might want to read the simpler introduction to the deep metric learning + API, dnn_metric_learning_ex.cpp, before reading this example. You should + also have read the examples that introduce the dlib DNN API before + continuing. These are dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp. + +*/ + +#include <dlib/dnn.h> +#include <dlib/image_io.h> +#include <dlib/misc_api.h> + +using namespace dlib; +using namespace std; + +// ---------------------------------------------------------------------------------------- + +// We will need to create some functions for loading data. This program will +// expect to be given a directory structured as follows: +// top_level_directory/ +// person1/ +// image1.jpg +// image2.jpg +// image3.jpg +// person2/ +// image4.jpg +// image5.jpg +// image6.jpg +// person3/ +// image7.jpg +// image8.jpg +// image9.jpg +// +// The specific folder and image names don't matter, nor does the number of folders or +// images. What does matter is that there is a top level folder, which contains +// subfolders, and each subfolder contains images of a single person. + +// This function spiders the top level directory and obtains a list of all the +// image files. +std::vector<std::vector<string>> load_objects_list ( + const string& dir +) +{ + std::vector<std::vector<string>> objects; + for (auto subdir : directory(dir).get_dirs()) + { + std::vector<string> imgs; + for (auto img : subdir.get_files()) + imgs.push_back(img); + + if (imgs.size() != 0) + objects.push_back(imgs); + } + return objects; +} + +// This function takes the output of load_objects_list() as input and randomly +// selects images for training. It should also be pointed out that it's really +// important that each mini-batch contain multiple images of each person. This +// is because the metric learning algorithm needs to consider pairs of images +// that should be close (i.e. images of the same person) as well as pairs of +// images that should be far apart (i.e. images of different people) during each +// training step. +void load_mini_batch ( + const size_t num_people, // how many different people to include + const size_t samples_per_id, // how many images per person to select. + dlib::rand& rnd, + const std::vector<std::vector<string>>& objs, + std::vector<matrix<rgb_pixel>>& images, + std::vector<unsigned long>& labels +) +{ + images.clear(); + labels.clear(); + DLIB_CASSERT(num_people <= objs.size(), "The dataset doesn't have that many people in it."); + + std::vector<bool> already_selected(objs.size(), false); + matrix<rgb_pixel> image; + for (size_t i = 0; i < num_people; ++i) + { + size_t id = rnd.get_random_32bit_number()%objs.size(); + // don't pick a person we already added to the mini-batch + while(already_selected[id]) + id = rnd.get_random_32bit_number()%objs.size(); + already_selected[id] = true; + + for (size_t j = 0; j < samples_per_id; ++j) + { + const auto& obj = objs[id][rnd.get_random_32bit_number()%objs[id].size()]; + load_image(image, obj); + images.push_back(std::move(image)); + labels.push_back(id); + } + } + + // You might want to do some data augmentation at this point. Here we do some simple + // color augmentation. + for (auto&& crop : images) + { + disturb_colors(crop,rnd); + // Jitter most crops + if (rnd.get_random_double() > 0.1) + crop = jitter_image(crop,rnd); + } + + + // All the images going into a mini-batch have to be the same size. And really, all + // the images in your entire training dataset should be the same size for what we are + // doing to make the most sense. + DLIB_CASSERT(images.size() > 0); + for (auto&& img : images) + { + DLIB_CASSERT(img.nr() == images[0].nr() && img.nc() == images[0].nc(), + "All the images in a single mini-batch must be the same size."); + } +} + +// ---------------------------------------------------------------------------------------- + +// The next page of code defines a ResNet network. It's basically copied +// and pasted from the dnn_imagenet_ex.cpp example, except we replaced the loss +// layer with loss_metric and make the network somewhat smaller. + +template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET> +using residual = add_prev1<block<N,BN,1,tag1<SUBNET>>>; + +template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET> +using residual_down = add_prev2<avg_pool<2,2,2,2,skip1<tag2<block<N,BN,2,tag1<SUBNET>>>>>>; + +template <int N, template <typename> class BN, int stride, typename SUBNET> +using block = BN<con<N,3,3,1,1,relu<BN<con<N,3,3,stride,stride,SUBNET>>>>>; + + +template <int N, typename SUBNET> using res = relu<residual<block,N,bn_con,SUBNET>>; +template <int N, typename SUBNET> using ares = relu<residual<block,N,affine,SUBNET>>; +template <int N, typename SUBNET> using res_down = relu<residual_down<block,N,bn_con,SUBNET>>; +template <int N, typename SUBNET> using ares_down = relu<residual_down<block,N,affine,SUBNET>>; + +// ---------------------------------------------------------------------------------------- + +template <typename SUBNET> using level0 = res_down<256,SUBNET>; +template <typename SUBNET> using level1 = res<256,res<256,res_down<256,SUBNET>>>; +template <typename SUBNET> using level2 = res<128,res<128,res_down<128,SUBNET>>>; +template <typename SUBNET> using level3 = res<64,res<64,res<64,res_down<64,SUBNET>>>>; +template <typename SUBNET> using level4 = res<32,res<32,res<32,SUBNET>>>; + +template <typename SUBNET> using alevel0 = ares_down<256,SUBNET>; +template <typename SUBNET> using alevel1 = ares<256,ares<256,ares_down<256,SUBNET>>>; +template <typename SUBNET> using alevel2 = ares<128,ares<128,ares_down<128,SUBNET>>>; +template <typename SUBNET> using alevel3 = ares<64,ares<64,ares<64,ares_down<64,SUBNET>>>>; +template <typename SUBNET> using alevel4 = ares<32,ares<32,ares<32,SUBNET>>>; + + +// training network type +using net_type = loss_metric<fc_no_bias<128,avg_pool_everything< + level0< + level1< + level2< + level3< + level4< + max_pool<3,3,2,2,relu<bn_con<con<32,7,7,2,2, + input_rgb_image + >>>>>>>>>>>>; + +// testing network type (replaced batch normalization with fixed affine transforms) +using anet_type = loss_metric<fc_no_bias<128,avg_pool_everything< + alevel0< + alevel1< + alevel2< + alevel3< + alevel4< + max_pool<3,3,2,2,relu<affine<con<32,7,7,2,2, + input_rgb_image + >>>>>>>>>>>>; + +// ---------------------------------------------------------------------------------------- + +int main(int argc, char** argv) +{ + if (argc != 2) + { + cout << "Give a folder as input. It should contain sub-folders of images and we will " << endl; + cout << "learn to distinguish between these sub-folders with metric learning. " << endl; + cout << "For example, you can run this program on the very small examples/johns dataset" << endl; + cout << "that comes with dlib by running this command:" << endl; + cout << " ./dnn_metric_learning_on_images_ex johns" << endl; + return 1; + } + + auto objs = load_objects_list(argv[1]); + + cout << "objs.size(): "<< objs.size() << endl; + + std::vector<matrix<rgb_pixel>> images; + std::vector<unsigned long> labels; + + + net_type net; + + dnn_trainer<net_type> trainer(net, sgd(0.0001, 0.9)); + trainer.set_learning_rate(0.1); + trainer.be_verbose(); + trainer.set_synchronization_file("face_metric_sync", std::chrono::minutes(5)); + // I've set this to something really small to make the example terminate + // sooner. But when you really want to train a good model you should set + // this to something like 10000 so training doesn't terminate too early. + trainer.set_iterations_without_progress_threshold(300); + + // If you have a lot of data then it might not be reasonable to load it all + // into RAM. So you will need to be sure you are decompressing your images + // and loading them fast enough to keep the GPU occupied. I like to do this + // using the following coding pattern: create a bunch of threads that dump + // mini-batches into dlib::pipes. + dlib::pipe<std::vector<matrix<rgb_pixel>>> qimages(4); + dlib::pipe<std::vector<unsigned long>> qlabels(4); + auto data_loader = [&qimages, &qlabels, &objs](time_t seed) + { + dlib::rand rnd(time(0)+seed); + std::vector<matrix<rgb_pixel>> images; + std::vector<unsigned long> labels; + while(qimages.is_enabled()) + { + try + { + load_mini_batch(5, 5, rnd, objs, images, labels); + qimages.enqueue(images); + qlabels.enqueue(labels); + } + catch(std::exception& e) + { + cout << "EXCEPTION IN LOADING DATA" << endl; + cout << e.what() << endl; + } + } + }; + // Run the data_loader from 5 threads. You should set the number of threads + // relative to the number of CPU cores you have. + std::thread data_loader1([data_loader](){ data_loader(1); }); + std::thread data_loader2([data_loader](){ data_loader(2); }); + std::thread data_loader3([data_loader](){ data_loader(3); }); + std::thread data_loader4([data_loader](){ data_loader(4); }); + std::thread data_loader5([data_loader](){ data_loader(5); }); + + + // Here we do the training. We keep passing mini-batches to the trainer until the + // learning rate has dropped low enough. + while(trainer.get_learning_rate() >= 1e-4) + { + qimages.dequeue(images); + qlabels.dequeue(labels); + trainer.train_one_step(images, labels); + } + + // Wait for training threads to stop + trainer.get_net(); + cout << "done training" << endl; + + // Save the network to disk + net.clean(); + serialize("metric_network_renset.dat") << net; + + // stop all the data loading threads and wait for them to terminate. + qimages.disable(); + qlabels.disable(); + data_loader1.join(); + data_loader2.join(); + data_loader3.join(); + data_loader4.join(); + data_loader5.join(); + + + + + + // Now, just to show an example of how you would use the network, let's check how well + // it performs on the training data. + dlib::rand rnd(time(0)); + load_mini_batch(5, 5, rnd, objs, images, labels); + + // Normally you would use the non-batch-normalized version of the network to do + // testing, which is what we do here. + anet_type testing_net = net; + + // Run all the images through the network to get their vector embeddings. + std::vector<matrix<float,0,1>> embedded = testing_net(images); + + // Now, check if the embedding puts images with the same labels near each other and + // images with different labels far apart. + int num_right = 0; + int num_wrong = 0; + for (size_t i = 0; i < embedded.size(); ++i) + { + for (size_t j = i+1; j < embedded.size(); ++j) + { + if (labels[i] == labels[j]) + { + // The loss_metric layer will cause images with the same label to be less + // than net.loss_details().get_distance_threshold() distance from each + // other. So we can use that distance value as our testing threshold. + if (length(embedded[i]-embedded[j]) < testing_net.loss_details().get_distance_threshold()) + ++num_right; + else + ++num_wrong; + } + else + { + if (length(embedded[i]-embedded[j]) >= testing_net.loss_details().get_distance_threshold()) + ++num_right; + else + ++num_wrong; + } + } + } + + cout << "num_right: "<< num_right << endl; + cout << "num_wrong: "<< num_wrong << endl; + +} + + diff --git a/ml/dlib/examples/dnn_mmod_dog_hipsterizer.cpp b/ml/dlib/examples/dnn_mmod_dog_hipsterizer.cpp new file mode 100644 index 00000000..22829d33 --- /dev/null +++ b/ml/dlib/examples/dnn_mmod_dog_hipsterizer.cpp @@ -0,0 +1,180 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This example shows how to run a CNN based dog face detector using dlib. The + example loads a pretrained model and uses it to find dog faces in images. + We also use the dlib::shape_predictor to find the location of the eyes and + nose and then draw glasses and a mustache onto each dog found :) + + + Users who are just learning about dlib's deep learning API should read the + dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp examples to learn how + the API works. For an introduction to the object detection method you + should read dnn_mmod_ex.cpp + + + + TRAINING THE MODEL + Finally, users interested in how the dog face detector was trained should + read the dnn_mmod_ex.cpp example program. It should be noted that the + dog face detector used in this example uses a bigger training dataset and + larger CNN architecture than what is shown in dnn_mmod_ex.cpp, but + otherwise training is the same. If you compare the net_type statements + in this file and dnn_mmod_ex.cpp you will see that they are very similar + except that the number of parameters has been increased. + + Additionally, the following training parameters were different during + training: The following lines in dnn_mmod_ex.cpp were changed from + mmod_options options(face_boxes_train, 40,40); + trainer.set_iterations_without_progress_threshold(300); + to the following when training the model used in this example: + mmod_options options(face_boxes_train, 80,80); + trainer.set_iterations_without_progress_threshold(8000); + + Also, the random_cropper was left at its default settings, So we didn't + call these functions: + cropper.set_chip_dims(200, 200); + cropper.set_min_object_size(40,40); + + The training data used to create the model is also available at + http://dlib.net/files/data/CU_dogs_fully_labeled.tar.gz + + Lastly, the shape_predictor was trained with default settings except we + used the following non-default settings: cascade depth=20, tree + depth=5, padding=0.2 +*/ + + +#include <iostream> +#include <dlib/dnn.h> +#include <dlib/data_io.h> +#include <dlib/image_processing.h> +#include <dlib/gui_widgets.h> + + +using namespace std; +using namespace dlib; + +// ---------------------------------------------------------------------------------------- + +template <long num_filters, typename SUBNET> using con5d = con<num_filters,5,5,2,2,SUBNET>; +template <long num_filters, typename SUBNET> using con5 = con<num_filters,5,5,1,1,SUBNET>; + +template <typename SUBNET> using downsampler = relu<affine<con5d<32, relu<affine<con5d<32, relu<affine<con5d<16,SUBNET>>>>>>>>>; +template <typename SUBNET> using rcon5 = relu<affine<con5<45,SUBNET>>>; + +using net_type = loss_mmod<con<1,9,9,1,1,rcon5<rcon5<rcon5<downsampler<input_rgb_image_pyramid<pyramid_down<6>>>>>>>>; + +// ---------------------------------------------------------------------------------------- + +int main(int argc, char** argv) try +{ + if (argc < 3) + { + cout << "Call this program like this:" << endl; + cout << "./dnn_mmod_dog_hipsterizer mmod_dog_hipsterizer.dat faces/dogs.jpg" << endl; + cout << "\nYou can get the mmod_dog_hipsterizer.dat file from:\n"; + cout << "http://dlib.net/files/mmod_dog_hipsterizer.dat.bz2" << endl; + return 0; + } + + + // load the models as well as glasses and mustache. + net_type net; + shape_predictor sp; + matrix<rgb_alpha_pixel> glasses, mustache; + deserialize(argv[1]) >> net >> sp >> glasses >> mustache; + pyramid_up(glasses); + pyramid_up(mustache); + + image_window win1(glasses); + image_window win2(mustache); + image_window win_wireframe, win_hipster; + + // Now process each image, find dogs, and hipsterize them by drawing glasses and a + // mustache on each dog :) + for (int i = 2; i < argc; ++i) + { + matrix<rgb_pixel> img; + load_image(img, argv[i]); + + // Upsampling the image will allow us to find smaller dog faces but will use more + // computational resources. + //pyramid_up(img); + + auto dets = net(img); + win_wireframe.clear_overlay(); + win_wireframe.set_image(img); + // We will also draw a wireframe on each dog's face so you can see where the + // shape_predictor is identifying face landmarks. + std::vector<image_window::overlay_line> lines; + for (auto&& d : dets) + { + // get the landmarks for this dog's face + auto shape = sp(img, d.rect); + + const rgb_pixel color(0,255,0); + auto top = shape.part(0); + auto lear = shape.part(1); + auto leye = shape.part(2); + auto nose = shape.part(3); + auto rear = shape.part(4); + auto reye = shape.part(5); + + // The locations of the left and right ends of the mustache. + auto lmustache = 1.3*(leye-reye)/2 + nose; + auto rmustache = 1.3*(reye-leye)/2 + nose; + + // Draw the glasses onto the image. + std::vector<point> from = {2*point(176,36), 2*point(59,35)}, to = {leye, reye}; + auto tform = find_similarity_transform(from, to); + for (long r = 0; r < glasses.nr(); ++r) + { + for (long c = 0; c < glasses.nc(); ++c) + { + point p = tform(point(c,r)); + if (get_rect(img).contains(p)) + assign_pixel(img(p.y(),p.x()), glasses(r,c)); + } + } + + // Draw the mustache onto the image right under the dog's nose. + auto mrect = get_rect(mustache); + from = {mrect.tl_corner(), mrect.tr_corner()}; + to = {rmustache, lmustache}; + tform = find_similarity_transform(from, to); + for (long r = 0; r < mustache.nr(); ++r) + { + for (long c = 0; c < mustache.nc(); ++c) + { + point p = tform(point(c,r)); + if (get_rect(img).contains(p)) + assign_pixel(img(p.y(),p.x()), mustache(r,c)); + } + } + + + // Record the lines needed for the face wire frame. + lines.push_back(image_window::overlay_line(leye, nose, color)); + lines.push_back(image_window::overlay_line(nose, reye, color)); + lines.push_back(image_window::overlay_line(reye, leye, color)); + lines.push_back(image_window::overlay_line(reye, rear, color)); + lines.push_back(image_window::overlay_line(rear, top, color)); + lines.push_back(image_window::overlay_line(top, lear, color)); + lines.push_back(image_window::overlay_line(lear, leye, color)); + } + + win_wireframe.add_overlay(lines); + win_hipster.set_image(img); + + cout << "Hit enter to process the next image." << endl; + cin.get(); + } +} +catch(std::exception& e) +{ + cout << e.what() << endl; +} + + + + diff --git a/ml/dlib/examples/dnn_mmod_ex.cpp b/ml/dlib/examples/dnn_mmod_ex.cpp new file mode 100644 index 00000000..9565d514 --- /dev/null +++ b/ml/dlib/examples/dnn_mmod_ex.cpp @@ -0,0 +1,230 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This example shows how to train a CNN based object detector using dlib's + loss_mmod loss layer. This loss layer implements the Max-Margin Object + Detection loss as described in the paper: + Max-Margin Object Detection by Davis E. King (http://arxiv.org/abs/1502.00046). + This is the same loss used by the popular SVM+HOG object detector in dlib + (see fhog_object_detector_ex.cpp) except here we replace the HOG features + with a CNN and train the entire detector end-to-end. This allows us to make + much more powerful detectors. + + It would be a good idea to become familiar with dlib's DNN tooling before + reading this example. So you should read dnn_introduction_ex.cpp and + dnn_introduction2_ex.cpp before reading this example program. + + Just like in the fhog_object_detector_ex.cpp example, we are going to train + a simple face detector based on the very small training dataset in the + examples/faces folder. As we will see, even with this small dataset the + MMOD method is able to make a working face detector. However, for real + applications you should train with more data for an even better result. +*/ + + +#include <iostream> +#include <dlib/dnn.h> +#include <dlib/data_io.h> +#include <dlib/gui_widgets.h> + +using namespace std; +using namespace dlib; + +// The first thing we do is define our CNN. The CNN is going to be evaluated +// convolutionally over an entire image pyramid. Think of it like a normal +// sliding window classifier. This means you need to define a CNN that can look +// at some part of an image and decide if it is an object of interest. In this +// example I've defined a CNN with a receptive field of a little over 50x50 +// pixels. This is reasonable for face detection since you can clearly tell if +// a 50x50 image contains a face. Other applications may benefit from CNNs with +// different architectures. +// +// In this example our CNN begins with 3 downsampling layers. These layers will +// reduce the size of the image by 8x and output a feature map with +// 32 dimensions. Then we will pass that through 4 more convolutional layers to +// get the final output of the network. The last layer has only 1 channel and +// the values in that last channel are large when the network thinks it has +// found an object at a particular location. + + +// Let's begin the network definition by creating some network blocks. + +// A 5x5 conv layer that does 2x downsampling +template <long num_filters, typename SUBNET> using con5d = con<num_filters,5,5,2,2,SUBNET>; +// A 3x3 conv layer that doesn't do any downsampling +template <long num_filters, typename SUBNET> using con3 = con<num_filters,3,3,1,1,SUBNET>; + +// Now we can define the 8x downsampling block in terms of conv5d blocks. We +// also use relu and batch normalization in the standard way. +template <typename SUBNET> using downsampler = relu<bn_con<con5d<32, relu<bn_con<con5d<32, relu<bn_con<con5d<32,SUBNET>>>>>>>>>; + +// The rest of the network will be 3x3 conv layers with batch normalization and +// relu. So we define the 3x3 block we will use here. +template <typename SUBNET> using rcon3 = relu<bn_con<con3<32,SUBNET>>>; + +// Finally, we define the entire network. The special input_rgb_image_pyramid +// layer causes the network to operate over a spatial pyramid, making the detector +// scale invariant. +using net_type = loss_mmod<con<1,6,6,1,1,rcon3<rcon3<rcon3<downsampler<input_rgb_image_pyramid<pyramid_down<6>>>>>>>>; + +// ---------------------------------------------------------------------------------------- + +int main(int argc, char** argv) try +{ + // In this example we are going to train a face detector based on the + // small faces dataset in the examples/faces directory. So the first + // thing we do is load that dataset. This means you need to supply the + // path to this faces folder as a command line argument so we will know + // where it is. + if (argc != 2) + { + cout << "Give the path to the examples/faces directory as the argument to this" << endl; + cout << "program. For example, if you are in the examples folder then execute " << endl; + cout << "this program by running: " << endl; + cout << " ./dnn_mmod_ex faces" << endl; + cout << endl; + return 0; + } + const std::string faces_directory = argv[1]; + // The faces directory contains a training dataset and a separate + // testing dataset. The training data consists of 4 images, each + // annotated with rectangles that bound each human face. The idea is + // to use this training data to learn to identify human faces in new + // images. + // + // Once you have trained an object detector it is always important to + // test it on data it wasn't trained on. Therefore, we will also load + // a separate testing set of 5 images. Once we have a face detector + // created from the training data we will see how well it works by + // running it on the testing images. + // + // So here we create the variables that will hold our dataset. + // images_train will hold the 4 training images and face_boxes_train + // holds the locations of the faces in the training images. So for + // example, the image images_train[0] has the faces given by the + // rectangles in face_boxes_train[0]. + std::vector<matrix<rgb_pixel>> images_train, images_test; + std::vector<std::vector<mmod_rect>> face_boxes_train, face_boxes_test; + + // Now we load the data. These XML files list the images in each dataset + // and also contain the positions of the face boxes. Obviously you can use + // any kind of input format you like so long as you store the data into + // images_train and face_boxes_train. But for convenience dlib comes with + // tools for creating and loading XML image datasets. Here you see how to + // load the data. To create the XML files you can use the imglab tool which + // can be found in the tools/imglab folder. It is a simple graphical tool + // for labeling objects in images with boxes. To see how to use it read the + // tools/imglab/README.txt file. + load_image_dataset(images_train, face_boxes_train, faces_directory+"/training.xml"); + load_image_dataset(images_test, face_boxes_test, faces_directory+"/testing.xml"); + + + cout << "num training images: " << images_train.size() << endl; + cout << "num testing images: " << images_test.size() << endl; + + + // The MMOD algorithm has some options you can set to control its behavior. However, + // you can also call the constructor with your training annotations and a "target + // object size" and it will automatically configure itself in a reasonable way for your + // problem. Here we are saying that faces are still recognizably faces when they are + // 40x40 pixels in size. You should generally pick the smallest size where this is + // true. Based on this information the mmod_options constructor will automatically + // pick a good sliding window width and height. It will also automatically set the + // non-max-suppression parameters to something reasonable. For further details see the + // mmod_options documentation. + mmod_options options(face_boxes_train, 40,40); + // The detector will automatically decide to use multiple sliding windows if needed. + // For the face data, only one is needed however. + cout << "num detector windows: "<< options.detector_windows.size() << endl; + for (auto& w : options.detector_windows) + cout << "detector window width by height: " << w.width << " x " << w.height << endl; + cout << "overlap NMS IOU thresh: " << options.overlaps_nms.get_iou_thresh() << endl; + cout << "overlap NMS percent covered thresh: " << options.overlaps_nms.get_percent_covered_thresh() << endl; + + // Now we are ready to create our network and trainer. + net_type net(options); + // The MMOD loss requires that the number of filters in the final network layer equal + // options.detector_windows.size(). So we set that here as well. + net.subnet().layer_details().set_num_filters(options.detector_windows.size()); + dnn_trainer<net_type> trainer(net); + trainer.set_learning_rate(0.1); + trainer.be_verbose(); + trainer.set_synchronization_file("mmod_sync", std::chrono::minutes(5)); + trainer.set_iterations_without_progress_threshold(300); + + + // Now let's train the network. We are going to use mini-batches of 150 + // images. The images are random crops from our training set (see + // random_cropper_ex.cpp for a discussion of the random_cropper). + std::vector<matrix<rgb_pixel>> mini_batch_samples; + std::vector<std::vector<mmod_rect>> mini_batch_labels; + random_cropper cropper; + cropper.set_chip_dims(200, 200); + // Usually you want to give the cropper whatever min sizes you passed to the + // mmod_options constructor, which is what we do here. + cropper.set_min_object_size(40,40); + dlib::rand rnd; + // Run the trainer until the learning rate gets small. This will probably take several + // hours. + while(trainer.get_learning_rate() >= 1e-4) + { + cropper(150, images_train, face_boxes_train, mini_batch_samples, mini_batch_labels); + // We can also randomly jitter the colors and that often helps a detector + // generalize better to new images. + for (auto&& img : mini_batch_samples) + disturb_colors(img, rnd); + + trainer.train_one_step(mini_batch_samples, mini_batch_labels); + } + // wait for training threads to stop + trainer.get_net(); + cout << "done training" << endl; + + // Save the network to disk + net.clean(); + serialize("mmod_network.dat") << net; + + + // Now that we have a face detector we can test it. The first statement tests it + // on the training data. It will print the precision, recall, and then average precision. + // This statement should indicate that the network works perfectly on the + // training data. + cout << "training results: " << test_object_detection_function(net, images_train, face_boxes_train) << endl; + // However, to get an idea if it really worked without overfitting we need to run + // it on images it wasn't trained on. The next line does this. Happily, + // this statement indicates that the detector finds most of the faces in the + // testing data. + cout << "testing results: " << test_object_detection_function(net, images_test, face_boxes_test) << endl; + + + // If you are running many experiments, it's also useful to log the settings used + // during the training experiment. This statement will print the settings we used to + // the screen. + cout << trainer << cropper << endl; + + // Now lets run the detector on the testing images and look at the outputs. + image_window win; + for (auto&& img : images_test) + { + pyramid_up(img); + auto dets = net(img); + win.clear_overlay(); + win.set_image(img); + for (auto&& d : dets) + win.add_overlay(d); + cin.get(); + } + return 0; + + // Now that you finished this example, you should read dnn_mmod_train_find_cars_ex.cpp, + // which is a more advanced example. It discusses many issues surrounding properly + // setting the MMOD parameters and creating a good training dataset. + +} +catch(std::exception& e) +{ + cout << e.what() << endl; +} + + + + diff --git a/ml/dlib/examples/dnn_mmod_face_detection_ex.cpp b/ml/dlib/examples/dnn_mmod_face_detection_ex.cpp new file mode 100644 index 00000000..3cdf4fcc --- /dev/null +++ b/ml/dlib/examples/dnn_mmod_face_detection_ex.cpp @@ -0,0 +1,114 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This example shows how to run a CNN based face detector using dlib. The + example loads a pretrained model and uses it to find faces in images. The + CNN model is much more accurate than the HOG based model shown in the + face_detection_ex.cpp example, but takes much more computational power to + run, and is meant to be executed on a GPU to attain reasonable speed. For + example, on a NVIDIA Titan X GPU, this example program processes images at + about the same speed as face_detection_ex.cpp. + + Also, users who are just learning about dlib's deep learning API should read + the dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp examples to learn + how the API works. For an introduction to the object detection method you + should read dnn_mmod_ex.cpp + + + + TRAINING THE MODEL + Finally, users interested in how the face detector was trained should + read the dnn_mmod_ex.cpp example program. It should be noted that the + face detector used in this example uses a bigger training dataset and + larger CNN architecture than what is shown in dnn_mmod_ex.cpp, but + otherwise training is the same. If you compare the net_type statements + in this file and dnn_mmod_ex.cpp you will see that they are very similar + except that the number of parameters has been increased. + + Additionally, the following training parameters were different during + training: The following lines in dnn_mmod_ex.cpp were changed from + mmod_options options(face_boxes_train, 40,40); + trainer.set_iterations_without_progress_threshold(300); + to the following when training the model used in this example: + mmod_options options(face_boxes_train, 80,80); + trainer.set_iterations_without_progress_threshold(8000); + + Also, the random_cropper was left at its default settings, So we didn't + call these functions: + cropper.set_chip_dims(200, 200); + cropper.set_min_object_size(40,40); + + The training data used to create the model is also available at + http://dlib.net/files/data/dlib_face_detection_dataset-2016-09-30.tar.gz +*/ + + +#include <iostream> +#include <dlib/dnn.h> +#include <dlib/data_io.h> +#include <dlib/image_processing.h> +#include <dlib/gui_widgets.h> + + +using namespace std; +using namespace dlib; + +// ---------------------------------------------------------------------------------------- + +template <long num_filters, typename SUBNET> using con5d = con<num_filters,5,5,2,2,SUBNET>; +template <long num_filters, typename SUBNET> using con5 = con<num_filters,5,5,1,1,SUBNET>; + +template <typename SUBNET> using downsampler = relu<affine<con5d<32, relu<affine<con5d<32, relu<affine<con5d<16,SUBNET>>>>>>>>>; +template <typename SUBNET> using rcon5 = relu<affine<con5<45,SUBNET>>>; + +using net_type = loss_mmod<con<1,9,9,1,1,rcon5<rcon5<rcon5<downsampler<input_rgb_image_pyramid<pyramid_down<6>>>>>>>>; + +// ---------------------------------------------------------------------------------------- + + +int main(int argc, char** argv) try +{ + if (argc == 1) + { + cout << "Call this program like this:" << endl; + cout << "./dnn_mmod_face_detection_ex mmod_human_face_detector.dat faces/*.jpg" << endl; + cout << "\nYou can get the mmod_human_face_detector.dat file from:\n"; + cout << "http://dlib.net/files/mmod_human_face_detector.dat.bz2" << endl; + return 0; + } + + + net_type net; + deserialize(argv[1]) >> net; + + image_window win; + for (int i = 2; i < argc; ++i) + { + matrix<rgb_pixel> img; + load_image(img, argv[i]); + + // Upsampling the image will allow us to detect smaller faces but will cause the + // program to use more RAM and run longer. + while(img.size() < 1800*1800) + pyramid_up(img); + + // Note that you can process a bunch of images in a std::vector at once and it runs + // much faster, since this will form mini-batches of images and therefore get + // better parallelism out of your GPU hardware. However, all the images must be + // the same size. To avoid this requirement on images being the same size we + // process them individually in this example. + auto dets = net(img); + win.clear_overlay(); + win.set_image(img); + for (auto&& d : dets) + win.add_overlay(d); + + cout << "Hit enter to process the next image." << endl; + cin.get(); + } +} +catch(std::exception& e) +{ + cout << e.what() << endl; +} + + diff --git a/ml/dlib/examples/dnn_mmod_find_cars2_ex.cpp b/ml/dlib/examples/dnn_mmod_find_cars2_ex.cpp new file mode 100644 index 00000000..b9fffbba --- /dev/null +++ b/ml/dlib/examples/dnn_mmod_find_cars2_ex.cpp @@ -0,0 +1,96 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This example shows how to run a CNN based vehicle detector using dlib. The + example loads a pretrained model and uses it to find the front and rear ends + of cars in an image. The model used by this example was trained by the + dnn_mmod_train_find_cars_ex.cpp example program on this dataset: + http://dlib.net/files/data/dlib_front_and_rear_vehicles_v1.tar + + Users who are just learning about dlib's deep learning API should read + the dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp examples to learn + how the API works. For an introduction to the object detection method you + should read dnn_mmod_ex.cpp. + + You can also see a video of this vehicle detector running on YouTube: + https://www.youtube.com/watch?v=OHbJ7HhbG74 +*/ + + +#include <iostream> +#include <dlib/dnn.h> +#include <dlib/image_io.h> +#include <dlib/gui_widgets.h> +#include <dlib/image_processing.h> + +using namespace std; +using namespace dlib; + + + +// The front and rear view vehicle detector network +template <long num_filters, typename SUBNET> using con5d = con<num_filters,5,5,2,2,SUBNET>; +template <long num_filters, typename SUBNET> using con5 = con<num_filters,5,5,1,1,SUBNET>; +template <typename SUBNET> using downsampler = relu<affine<con5d<32, relu<affine<con5d<32, relu<affine<con5d<16,SUBNET>>>>>>>>>; +template <typename SUBNET> using rcon5 = relu<affine<con5<55,SUBNET>>>; +using net_type = loss_mmod<con<1,9,9,1,1,rcon5<rcon5<rcon5<downsampler<input_rgb_image_pyramid<pyramid_down<6>>>>>>>>; + +// ---------------------------------------------------------------------------------------- + +int main() try +{ + net_type net; + shape_predictor sp; + // You can get this file from http://dlib.net/files/mmod_front_and_rear_end_vehicle_detector.dat.bz2 + // This network was produced by the dnn_mmod_train_find_cars_ex.cpp example program. + // As you can see, the file also includes a separately trained shape_predictor. To see + // a generic example of how to train those refer to train_shape_predictor_ex.cpp. + deserialize("mmod_front_and_rear_end_vehicle_detector.dat") >> net >> sp; + + matrix<rgb_pixel> img; + load_image(img, "../mmod_cars_test_image2.jpg"); + + image_window win; + win.set_image(img); + + // Run the detector on the image and show us the output. + for (auto&& d : net(img)) + { + // We use a shape_predictor to refine the exact shape and location of the detection + // box. This shape_predictor is trained to simply output the 4 corner points of + // the box. So all we do is make a rectangle that tightly contains those 4 points + // and that rectangle is our refined detection position. + auto fd = sp(img,d); + rectangle rect; + for (unsigned long j = 0; j < fd.num_parts(); ++j) + rect += fd.part(j); + + if (d.label == "rear") + win.add_overlay(rect, rgb_pixel(255,0,0), d.label); + else + win.add_overlay(rect, rgb_pixel(255,255,0), d.label); + } + + + + + cout << "Hit enter to end program" << endl; + cin.get(); +} +catch(image_load_error& e) +{ + cout << e.what() << endl; + cout << "The test image is located in the examples folder. So you should run this program from a sub folder so that the relative path is correct." << endl; +} +catch(serialization_error& e) +{ + cout << e.what() << endl; + cout << "The correct model file can be obtained from: http://dlib.net/files/mmod_front_and_rear_end_vehicle_detector.dat.bz2" << endl; +} +catch(std::exception& e) +{ + cout << e.what() << endl; +} + + + + diff --git a/ml/dlib/examples/dnn_mmod_find_cars_ex.cpp b/ml/dlib/examples/dnn_mmod_find_cars_ex.cpp new file mode 100644 index 00000000..b11b1cfd --- /dev/null +++ b/ml/dlib/examples/dnn_mmod_find_cars_ex.cpp @@ -0,0 +1,236 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This example shows how to run a CNN based vehicle detector using dlib. The + example loads a pretrained model and uses it to find the rear ends of cars in + an image. We will also visualize some of the detector's processing steps by + plotting various intermediate images on the screen. Viewing these can help + you understand how the detector works. + + The model used by this example was trained by the dnn_mmod_train_find_cars_ex.cpp + example. Also, since this is a CNN, you really should use a GPU to get the + best execution speed. For instance, when run on a NVIDIA 1080ti, this detector + runs at 98fps when run on the provided test image. That's more than an order + of magnitude faster than when run on the CPU. + + Users who are just learning about dlib's deep learning API should read + the dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp examples to learn + how the API works. For an introduction to the object detection method you + should read dnn_mmod_ex.cpp. + + You can also see some videos of this vehicle detector running on YouTube: + https://www.youtube.com/watch?v=4B3bzmxMAZU + https://www.youtube.com/watch?v=bP2SUo5vSlc +*/ + + +#include <iostream> +#include <dlib/dnn.h> +#include <dlib/image_io.h> +#include <dlib/gui_widgets.h> +#include <dlib/image_processing.h> + +using namespace std; +using namespace dlib; + + + +// The rear view vehicle detector network +template <long num_filters, typename SUBNET> using con5d = con<num_filters,5,5,2,2,SUBNET>; +template <long num_filters, typename SUBNET> using con5 = con<num_filters,5,5,1,1,SUBNET>; +template <typename SUBNET> using downsampler = relu<affine<con5d<32, relu<affine<con5d<32, relu<affine<con5d<16,SUBNET>>>>>>>>>; +template <typename SUBNET> using rcon5 = relu<affine<con5<55,SUBNET>>>; +using net_type = loss_mmod<con<1,9,9,1,1,rcon5<rcon5<rcon5<downsampler<input_rgb_image_pyramid<pyramid_down<6>>>>>>>>; + +// ---------------------------------------------------------------------------------------- + +int main() try +{ + net_type net; + shape_predictor sp; + // You can get this file from http://dlib.net/files/mmod_rear_end_vehicle_detector.dat.bz2 + // This network was produced by the dnn_mmod_train_find_cars_ex.cpp example program. + // As you can see, the file also includes a separately trained shape_predictor. To see + // a generic example of how to train those refer to train_shape_predictor_ex.cpp. + deserialize("mmod_rear_end_vehicle_detector.dat") >> net >> sp; + + matrix<rgb_pixel> img; + load_image(img, "../mmod_cars_test_image.jpg"); + + image_window win; + win.set_image(img); + + // Run the detector on the image and show us the output. + for (auto&& d : net(img)) + { + // We use a shape_predictor to refine the exact shape and location of the detection + // box. This shape_predictor is trained to simply output the 4 corner points of + // the box. So all we do is make a rectangle that tightly contains those 4 points + // and that rectangle is our refined detection position. + auto fd = sp(img,d); + rectangle rect; + for (unsigned long j = 0; j < fd.num_parts(); ++j) + rect += fd.part(j); + win.add_overlay(rect, rgb_pixel(255,0,0)); + } + + + + cout << "Hit enter to view the intermediate processing steps" << endl; + cin.get(); + + + // Now let's look at how the detector works. The high level processing steps look like: + // 1. Create an image pyramid and pack the pyramid into one big image. We call this + // image the "tiled pyramid". + // 2. Run the tiled pyramid image through the CNN. The CNN outputs a new image where + // bright pixels in the output image indicate the presence of cars. + // 3. Find pixels in the CNN's output image with a value > 0. Those locations are your + // preliminary car detections. + // 4. Perform non-maximum suppression on the preliminary detections to produce the + // final output. + // + // We will be plotting the images from steps 1 and 2 so you can visualize what's + // happening. For the CNN's output image, we will use the jet colormap so that "bright" + // outputs, i.e. pixels with big values, appear in red and "dim" outputs appear as a + // cold blue color. To do this we pick a range of CNN output values for the color + // mapping. The specific values don't matter. They are just selected to give a nice + // looking output image. + const float lower = -2.5; + const float upper = 0.0; + cout << "jet color mapping range: lower="<< lower << " upper="<< upper << endl; + + + + // Create a tiled pyramid image and display it on the screen. + std::vector<rectangle> rects; + matrix<rgb_pixel> tiled_img; + // Get the type of pyramid the CNN used + using pyramid_type = std::remove_reference<decltype(input_layer(net))>::type::pyramid_type; + // And tell create_tiled_pyramid to create the pyramid using that pyramid type. + create_tiled_pyramid<pyramid_type>(img, tiled_img, rects, + input_layer(net).get_pyramid_padding(), + input_layer(net).get_pyramid_outer_padding()); + image_window winpyr(tiled_img, "Tiled pyramid"); + + + + // This CNN detector represents a sliding window detector with 3 sliding windows. Each + // of the 3 windows has a different aspect ratio, allowing it to find vehicles which + // are either tall and skinny, squarish, or short and wide. The aspect ratio of a + // detection is determined by which channel in the output image triggers the detection. + // Here we are just going to max pool the channels together to get one final image for + // our display. In this image, a pixel will be bright if any of the sliding window + // detectors thinks there is a car at that location. + cout << "Number of channels in final tensor image: " << net.subnet().get_output().k() << endl; + matrix<float> network_output = image_plane(net.subnet().get_output(),0,0); + for (long k = 1; k < net.subnet().get_output().k(); ++k) + network_output = max_pointwise(network_output, image_plane(net.subnet().get_output(),0,k)); + // We will also upsample the CNN's output image. The CNN we defined has an 8x + // downsampling layer at the beginning. In the code below we are going to overlay this + // CNN output image on top of the raw input image. To make that look nice it helps to + // upsample the CNN output image back to the same resolution as the input image, which + // we do here. + const double network_output_scale = img.nc()/(double)network_output.nc(); + resize_image(network_output_scale, network_output); + + + // Display the network's output as a color image. + image_window win_output(jet(network_output, upper, lower), "Output tensor from the network"); + + + // Also, overlay network_output on top of the tiled image pyramid and display it. + for (long r = 0; r < tiled_img.nr(); ++r) + { + for (long c = 0; c < tiled_img.nc(); ++c) + { + dpoint tmp(c,r); + tmp = input_tensor_to_output_tensor(net, tmp); + tmp = point(network_output_scale*tmp); + if (get_rect(network_output).contains(tmp)) + { + float val = network_output(tmp.y(),tmp.x()); + // alpha blend the network output pixel with the RGB image to make our + // overlay. + rgb_alpha_pixel p; + assign_pixel(p , colormap_jet(val,lower,upper)); + p.alpha = 120; + assign_pixel(tiled_img(r,c), p); + } + } + } + // If you look at this image you can see that the vehicles have bright red blobs on + // them. That's the CNN saying "there is a car here!". You will also notice there is + // a certain scale at which it finds cars. They have to be not too big or too small, + // which is why we have an image pyramid. The pyramid allows us to find cars of all + // scales. + image_window win_pyr_overlay(tiled_img, "Detection scores on image pyramid"); + + + + + // Finally, we can collapse the pyramid back into the original image. The CNN doesn't + // actually do this step, since it's enough to threshold the tiled pyramid image to get + // the detections. However, it makes a nice visualization and clearly indicates that + // the detector is firing for all the cars. + matrix<float> collapsed(img.nr(), img.nc()); + resizable_tensor input_tensor; + input_layer(net).to_tensor(&img, &img+1, input_tensor); + for (long r = 0; r < collapsed.nr(); ++r) + { + for (long c = 0; c < collapsed.nc(); ++c) + { + // Loop over a bunch of scale values and look up what part of network_output + // corresponds to the point(c,r) in the original image, then take the max + // detection score over all the scales and save it at pixel point(c,r). + float max_score = -1e30; + for (double scale = 1; scale > 0.2; scale *= 5.0/6.0) + { + // Map from input image coordinates to tiled pyramid coordinates. + dpoint tmp = center(input_layer(net).image_space_to_tensor_space(input_tensor,scale, drectangle(dpoint(c,r)))); + // Now map from pyramid coordinates to network_output coordinates. + tmp = point(network_output_scale*input_tensor_to_output_tensor(net, tmp)); + + if (get_rect(network_output).contains(tmp)) + { + float val = network_output(tmp.y(),tmp.x()); + if (val > max_score) + max_score = val; + } + } + + collapsed(r,c) = max_score; + + // Also blend the scores into the original input image so we can view it as + // an overlay on the cars. + rgb_alpha_pixel p; + assign_pixel(p , colormap_jet(max_score,lower,upper)); + p.alpha = 120; + assign_pixel(img(r,c), p); + } + } + + image_window win_collapsed(jet(collapsed, upper, lower), "Collapsed output tensor from the network"); + image_window win_img_and_sal(img, "Collapsed detection scores on raw image"); + + + cout << "Hit enter to end program" << endl; + cin.get(); +} +catch(image_load_error& e) +{ + cout << e.what() << endl; + cout << "The test image is located in the examples folder. So you should run this program from a sub folder so that the relative path is correct." << endl; +} +catch(serialization_error& e) +{ + cout << e.what() << endl; + cout << "The correct model file can be obtained from: http://dlib.net/files/mmod_rear_end_vehicle_detector.dat.bz2" << endl; +} +catch(std::exception& e) +{ + cout << e.what() << endl; +} + + + + diff --git a/ml/dlib/examples/dnn_mmod_train_find_cars_ex.cpp b/ml/dlib/examples/dnn_mmod_train_find_cars_ex.cpp new file mode 100644 index 00000000..b97e25a8 --- /dev/null +++ b/ml/dlib/examples/dnn_mmod_train_find_cars_ex.cpp @@ -0,0 +1,425 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This example shows how to train a CNN based object detector using dlib's + loss_mmod loss layer. This loss layer implements the Max-Margin Object + Detection loss as described in the paper: + Max-Margin Object Detection by Davis E. King (http://arxiv.org/abs/1502.00046). + This is the same loss used by the popular SVM+HOG object detector in dlib + (see fhog_object_detector_ex.cpp) except here we replace the HOG features + with a CNN and train the entire detector end-to-end. This allows us to make + much more powerful detectors. + + It would be a good idea to become familiar with dlib's DNN tooling before reading this + example. So you should read dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp + before reading this example program. You should also read the introductory DNN+MMOD + example dnn_mmod_ex.cpp as well before proceeding. + + + This example is essentially a more complex version of dnn_mmod_ex.cpp. In it we train + a detector that finds the rear ends of motor vehicles. I will also discuss some + aspects of data preparation useful when training this kind of detector. + +*/ + + +#include <iostream> +#include <dlib/dnn.h> +#include <dlib/data_io.h> + +using namespace std; +using namespace dlib; + + + +template <long num_filters, typename SUBNET> using con5d = con<num_filters,5,5,2,2,SUBNET>; +template <long num_filters, typename SUBNET> using con5 = con<num_filters,5,5,1,1,SUBNET>; +template <typename SUBNET> using downsampler = relu<bn_con<con5d<32, relu<bn_con<con5d<32, relu<bn_con<con5d<16,SUBNET>>>>>>>>>; +template <typename SUBNET> using rcon5 = relu<bn_con<con5<55,SUBNET>>>; +using net_type = loss_mmod<con<1,9,9,1,1,rcon5<rcon5<rcon5<downsampler<input_rgb_image_pyramid<pyramid_down<6>>>>>>>>; + + +// ---------------------------------------------------------------------------------------- + +int ignore_overlapped_boxes( + std::vector<mmod_rect>& boxes, + const test_box_overlap& overlaps +) +/*! + ensures + - Whenever two rectangles in boxes overlap, according to overlaps(), we set the + smallest box to ignore. + - returns the number of newly ignored boxes. +!*/ +{ + int num_ignored = 0; + for (size_t i = 0; i < boxes.size(); ++i) + { + if (boxes[i].ignore) + continue; + for (size_t j = i+1; j < boxes.size(); ++j) + { + if (boxes[j].ignore) + continue; + if (overlaps(boxes[i], boxes[j])) + { + ++num_ignored; + if(boxes[i].rect.area() < boxes[j].rect.area()) + boxes[i].ignore = true; + else + boxes[j].ignore = true; + } + } + } + return num_ignored; +} + +// ---------------------------------------------------------------------------------------- + +int main(int argc, char** argv) try +{ + if (argc != 2) + { + cout << "Give the path to a folder containing training.xml and testing.xml files." << endl; + cout << "This example program is specifically designed to run on the dlib vehicle " << endl; + cout << "detection dataset, which is available at this URL: " << endl; + cout << " http://dlib.net/files/data/dlib_rear_end_vehicles_v1.tar" << endl; + cout << endl; + cout << "So download that dataset, extract it somewhere, and then run this program" << endl; + cout << "with the dlib_rear_end_vehicles folder as an argument. E.g. if you extract" << endl; + cout << "the dataset to the current folder then you should run this example program" << endl; + cout << "by typing: " << endl; + cout << " ./dnn_mmod_train_find_cars_ex dlib_rear_end_vehicles" << endl; + cout << endl; + cout << "It takes about a day to finish if run on a high end GPU like a 1080ti." << endl; + cout << endl; + return 0; + } + const std::string data_directory = argv[1]; + + + std::vector<matrix<rgb_pixel>> images_train, images_test; + std::vector<std::vector<mmod_rect>> boxes_train, boxes_test; + load_image_dataset(images_train, boxes_train, data_directory+"/training.xml"); + load_image_dataset(images_test, boxes_test, data_directory+"/testing.xml"); + + // When I was creating the dlib vehicle detection dataset I had to label all the cars + // in each image. MMOD requires all cars to be labeled, since any unlabeled part of an + // image is implicitly assumed to be not a car, and the algorithm will use it as + // negative training data. So every car must be labeled, either with a normal + // rectangle or an "ignore" rectangle that tells MMOD to simply ignore it (i.e. neither + // treat it as a thing to detect nor as negative training data). + // + // In our present case, many images contain very tiny cars in the distance, ones that + // are essentially just dark smudges. It's not reasonable to expect the CNN + // architecture we defined to detect such vehicles. However, I erred on the side of + // having more complete annotations when creating the dataset. So when I labeled these + // images I labeled many of these really difficult cases as vehicles to detect. + // + // So the first thing we are going to do is clean up our dataset a little bit. In + // particular, we are going to mark boxes smaller than 35*35 pixels as ignore since + // only really small and blurry cars appear at those sizes. We will also mark boxes + // that are heavily overlapped by another box as ignore. We do this because we want to + // allow for stronger non-maximum suppression logic in the learned detector, since that + // will help make it easier to learn a good detector. + // + // To explain this non-max suppression idea further it's important to understand how + // the detector works. Essentially, sliding window detectors scan all image locations + // and ask "is there a car here?". If there really is a car in a specific location in + // an image then usually many slightly different sliding window locations will produce + // high detection scores, indicating that there is a car at those locations. If we + // just stopped there then each car would produce multiple detections. But that isn't + // what we want. We want each car to produce just one detection. So it's common for + // detectors to include "non-maximum suppression" logic which simply takes the + // strongest detection and then deletes all detections "close to" the strongest. This + // is a simple post-processing step that can eliminate duplicate detections. However, + // we have to define what "close to" means. We can do this by looking at your training + // data and checking how close the closest target boxes are to each other, and then + // picking a "close to" measure that doesn't suppress those target boxes but is + // otherwise as tight as possible. This is exactly what the mmod_options object does + // by default. + // + // Importantly, this means that if your training dataset contains an image with two + // target boxes that really overlap a whole lot, then the non-maximum suppression + // "close to" measure will be configured to allow detections to really overlap a whole + // lot. On the other hand, if your dataset didn't contain any overlapped boxes at all, + // then the non-max suppression logic would be configured to filter out any boxes that + // overlapped at all, and thus would be performing a much stronger non-max suppression. + // + // Why does this matter? Well, remember that we want to avoid duplicate detections. + // If non-max suppression just kills everything in a really wide area around a car then + // the CNN doesn't really need to learn anything about avoiding duplicate detections. + // However, if non-max suppression only suppresses a tiny area around each detection + // then the CNN will need to learn to output small detection scores for those areas of + // the image not suppressed. The smaller the non-max suppression region the more the + // CNN has to learn and the more difficult the learning problem will become. This is + // why we remove highly overlapped objects from the training dataset. That is, we do + // it so the non-max suppression logic will be able to be reasonably effective. Here + // we are ensuring that any boxes that are entirely contained by another are + // suppressed. We also ensure that boxes with an intersection over union of 0.5 or + // greater are suppressed. This will improve the resulting detector since it will be + // able to use more aggressive non-max suppression settings. + + int num_overlapped_ignored_test = 0; + for (auto& v : boxes_test) + num_overlapped_ignored_test += ignore_overlapped_boxes(v, test_box_overlap(0.50, 0.95)); + + int num_overlapped_ignored = 0; + int num_additional_ignored = 0; + for (auto& v : boxes_train) + { + num_overlapped_ignored += ignore_overlapped_boxes(v, test_box_overlap(0.50, 0.95)); + for (auto& bb : v) + { + if (bb.rect.width() < 35 && bb.rect.height() < 35) + { + if (!bb.ignore) + { + bb.ignore = true; + ++num_additional_ignored; + } + } + + // The dlib vehicle detection dataset doesn't contain any detections with + // really extreme aspect ratios. However, some datasets do, often because of + // bad labeling. So it's a good idea to check for that and either eliminate + // those boxes or set them to ignore. Although, this depends on your + // application. + // + // For instance, if your dataset has boxes with an aspect ratio + // of 10 then you should think about what that means for the network + // architecture. Does the receptive field even cover the entirety of the box + // in those cases? Do you care about these boxes? Are they labeling errors? + // I find that many people will download some dataset from the internet and + // just take it as given. They run it through some training algorithm and take + // the dataset as unchallengeable truth. But many datasets are full of + // labeling errors. There are also a lot of datasets that aren't full of + // errors, but are annotated in a sloppy and inconsistent way. Fixing those + // errors and inconsistencies can often greatly improve models trained from + // such data. It's almost always worth the time to try and improve your + // training dataset. + // + // In any case, my point is that there are other types of dataset cleaning you + // could put here. What exactly you need depends on your application. But you + // should carefully consider it and not take your dataset as a given. The work + // of creating a good detector is largely about creating a high quality + // training dataset. + } + } + + // When modifying a dataset like this, it's a really good idea to print a log of how + // many boxes you ignored. It's easy to accidentally ignore a huge block of data, so + // you should always look and see that things are doing what you expect. + cout << "num_overlapped_ignored: "<< num_overlapped_ignored << endl; + cout << "num_additional_ignored: "<< num_additional_ignored << endl; + cout << "num_overlapped_ignored_test: "<< num_overlapped_ignored_test << endl; + + + cout << "num training images: " << images_train.size() << endl; + cout << "num testing images: " << images_test.size() << endl; + + + // Our vehicle detection dataset has basically 3 different types of boxes. Square + // boxes, tall and skinny boxes (e.g. semi trucks), and short and wide boxes (e.g. + // sedans). Here we are telling the MMOD algorithm that a vehicle is recognizable as + // long as the longest box side is at least 70 pixels long and the shortest box side is + // at least 30 pixels long. mmod_options will use these parameters to decide how large + // each of the sliding windows needs to be so as to be able to detect all the vehicles. + // Since our dataset has basically these 3 different aspect ratios, it will decide to + // use 3 different sliding windows. This means the final con layer in the network will + // have 3 filters, one for each of these aspect ratios. + // + // Another thing to consider when setting the sliding window size is the "stride" of + // your network. The network we defined above downsamples the image by a factor of 8x + // in the first few layers. So when the sliding windows are scanning the image, they + // are stepping over it with a stride of 8 pixels. If you set the sliding window size + // too small then the stride will become an issue. For instance, if you set the + // sliding window size to 4 pixels, then it means a 4x4 window will be moved by 8 + // pixels at a time when scanning. This is obviously a problem since 75% of the image + // won't even be visited by the sliding window. So you need to set the window size to + // be big enough relative to the stride of your network. In our case, the windows are + // at least 30 pixels in length, so being moved by 8 pixel steps is fine. + mmod_options options(boxes_train, 70, 30); + + + // This setting is very important and dataset specific. The vehicle detection dataset + // contains boxes that are marked as "ignore", as we discussed above. Some of them are + // ignored because we set ignore to true in the above code. However, the xml files + // also contained a lot of ignore boxes. Some of them are large boxes that encompass + // large parts of an image and the intention is to have everything inside those boxes + // be ignored. Therefore, we need to tell the MMOD algorithm to do that, which we do + // by setting options.overlaps_ignore appropriately. + // + // But first, we need to understand exactly what this option does. The MMOD loss + // is essentially counting the number of false alarms + missed detections produced by + // the detector for each image. During training, the code is running the detector on + // each image in a mini-batch and looking at its output and counting the number of + // mistakes. The optimizer tries to find parameters settings that minimize the number + // of detector mistakes. + // + // This overlaps_ignore option allows you to tell the loss that some outputs from the + // detector should be totally ignored, as if they never happened. In particular, if a + // detection overlaps a box in the training data with ignore==true then that detection + // is ignored. This overlap is determined by calling + // options.overlaps_ignore(the_detection, the_ignored_training_box). If it returns + // true then that detection is ignored. + // + // You should read the documentation for test_box_overlap, the class type for + // overlaps_ignore for full details. However, the gist is that the default behavior is + // to only consider boxes as overlapping if their intersection over union is > 0.5. + // However, the dlib vehicle detection dataset contains large boxes that are meant to + // mask out large areas of an image. So intersection over union isn't an appropriate + // way to measure "overlaps with box" in this case. We want any box that is contained + // inside one of these big regions to be ignored, even if the detection box is really + // small. So we set overlaps_ignore to behave that way with this line. + options.overlaps_ignore = test_box_overlap(0.5, 0.95); + + net_type net(options); + + // The final layer of the network must be a con layer that contains + // options.detector_windows.size() filters. This is because these final filters are + // what perform the final "sliding window" detection in the network. For the dlib + // vehicle dataset, there will be 3 sliding window detectors, so we will be setting + // num_filters to 3 here. + net.subnet().layer_details().set_num_filters(options.detector_windows.size()); + + + dnn_trainer<net_type> trainer(net,sgd(0.0001,0.9)); + trainer.set_learning_rate(0.1); + trainer.be_verbose(); + + + // While training, we are going to use early stopping. That is, we will be checking + // how good the detector is performing on our test data and when it stops getting + // better on the test data we will drop the learning rate. We will keep doing that + // until the learning rate is less than 1e-4. These two settings tell the trainer to + // do that. Essentially, we are setting the first argument to infinity, and only the + // test iterations without progress threshold will matter. In particular, it says that + // once we observe 1000 testing mini-batches where the test loss clearly isn't + // decreasing we will lower the learning rate. + trainer.set_iterations_without_progress_threshold(50000); + trainer.set_test_iterations_without_progress_threshold(1000); + + const string sync_filename = "mmod_cars_sync"; + trainer.set_synchronization_file(sync_filename, std::chrono::minutes(5)); + + + + + std::vector<matrix<rgb_pixel>> mini_batch_samples; + std::vector<std::vector<mmod_rect>> mini_batch_labels; + random_cropper cropper; + cropper.set_seed(time(0)); + cropper.set_chip_dims(350, 350); + // Usually you want to give the cropper whatever min sizes you passed to the + // mmod_options constructor, or very slightly smaller sizes, which is what we do here. + cropper.set_min_object_size(69,28); + cropper.set_max_rotation_degrees(2); + dlib::rand rnd; + + // Log the training parameters to the console + cout << trainer << cropper << endl; + + int cnt = 1; + // Run the trainer until the learning rate gets small. + while(trainer.get_learning_rate() >= 1e-4) + { + // Every 30 mini-batches we do a testing mini-batch. + if (cnt%30 != 0 || images_test.size() == 0) + { + cropper(87, images_train, boxes_train, mini_batch_samples, mini_batch_labels); + // We can also randomly jitter the colors and that often helps a detector + // generalize better to new images. + for (auto&& img : mini_batch_samples) + disturb_colors(img, rnd); + + // It's a good idea to, at least once, put code here that displays the images + // and boxes the random cropper is generating. You should look at them and + // think about if the output makes sense for your problem. Most of the time + // it will be fine, but sometimes you will realize that the pattern of cropping + // isn't really appropriate for your problem and you will need to make some + // change to how the mini-batches are being generated. Maybe you will tweak + // some of the cropper's settings, or write your own entirely separate code to + // create mini-batches. But either way, if you don't look you will never know. + // An easy way to do this is to create a dlib::image_window to display the + // images and boxes. + + trainer.train_one_step(mini_batch_samples, mini_batch_labels); + } + else + { + cropper(87, images_test, boxes_test, mini_batch_samples, mini_batch_labels); + // We can also randomly jitter the colors and that often helps a detector + // generalize better to new images. + for (auto&& img : mini_batch_samples) + disturb_colors(img, rnd); + + trainer.test_one_step(mini_batch_samples, mini_batch_labels); + } + ++cnt; + } + // wait for training threads to stop + trainer.get_net(); + cout << "done training" << endl; + + // Save the network to disk + net.clean(); + serialize("mmod_rear_end_vehicle_detector.dat") << net; + + + // It's a really good idea to print the training parameters. This is because you will + // invariably be running multiple rounds of training and should be logging the output + // to a file. This print statement will include many of the training parameters in + // your log. + cout << trainer << cropper << endl; + + cout << "\nsync_filename: " << sync_filename << endl; + cout << "num training images: "<< images_train.size() << endl; + cout << "training results: " << test_object_detection_function(net, images_train, boxes_train, test_box_overlap(), 0, options.overlaps_ignore); + // Upsampling the data will allow the detector to find smaller cars. Recall that + // we configured it to use a sliding window nominally 70 pixels in size. So upsampling + // here will let it find things nominally 35 pixels in size. Although we include a + // limit of 1800*1800 here which means "don't upsample an image if it's already larger + // than 1800*1800". We do this so we don't run out of RAM, which is a concern because + // some of the images in the dlib vehicle dataset are really high resolution. + upsample_image_dataset<pyramid_down<2>>(images_train, boxes_train, 1800*1800); + cout << "training upsampled results: " << test_object_detection_function(net, images_train, boxes_train, test_box_overlap(), 0, options.overlaps_ignore); + + + cout << "num testing images: "<< images_test.size() << endl; + cout << "testing results: " << test_object_detection_function(net, images_test, boxes_test, test_box_overlap(), 0, options.overlaps_ignore); + upsample_image_dataset<pyramid_down<2>>(images_test, boxes_test, 1800*1800); + cout << "testing upsampled results: " << test_object_detection_function(net, images_test, boxes_test, test_box_overlap(), 0, options.overlaps_ignore); + + /* + This program takes many hours to execute on a high end GPU. It took about a day to + train on a NVIDIA 1080ti. The resulting model file is available at + http://dlib.net/files/mmod_rear_end_vehicle_detector.dat.bz2 + It should be noted that this file on dlib.net has a dlib::shape_predictor appended + onto the end of it (see dnn_mmod_find_cars_ex.cpp for an example of its use). This + explains why the model file on dlib.net is larger than the + mmod_rear_end_vehicle_detector.dat output by this program. + + You can see some videos of this vehicle detector running on YouTube: + https://www.youtube.com/watch?v=4B3bzmxMAZU + https://www.youtube.com/watch?v=bP2SUo5vSlc + + Also, the training and testing accuracies were: + num training images: 2217 + training results: 0.990738 0.736431 0.736073 + training upsampled results: 0.986837 0.937694 0.936912 + num testing images: 135 + testing results: 0.988827 0.471372 0.470806 + testing upsampled results: 0.987879 0.651132 0.650399 + */ + + return 0; + +} +catch(std::exception& e) +{ + cout << e.what() << endl; +} + + + + diff --git a/ml/dlib/examples/dnn_semantic_segmentation_ex.cpp b/ml/dlib/examples/dnn_semantic_segmentation_ex.cpp new file mode 100644 index 00000000..fa49c5a9 --- /dev/null +++ b/ml/dlib/examples/dnn_semantic_segmentation_ex.cpp @@ -0,0 +1,172 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This example shows how to do semantic segmentation on an image using net pretrained + on the PASCAL VOC2012 dataset. For an introduction to what segmentation is, see the + accompanying header file dnn_semantic_segmentation_ex.h. + + Instructions how to run the example: + 1. Download the PASCAL VOC2012 data, and untar it somewhere. + http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar + 2. Build the dnn_semantic_segmentation_train_ex example program. + 3. Run: + ./dnn_semantic_segmentation_train_ex /path/to/VOC2012 + 4. Wait while the network is being trained. + 5. Build the dnn_semantic_segmentation_ex example program. + 6. Run: + ./dnn_semantic_segmentation_ex /path/to/VOC2012-or-other-images + + An alternative to steps 2-4 above is to download a pre-trained network + from here: http://dlib.net/files/semantic_segmentation_voc2012net.dnn + + It would be a good idea to become familiar with dlib's DNN tooling before reading this + example. So you should read dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp + before reading this example program. +*/ + +#include "dnn_semantic_segmentation_ex.h" + +#include <iostream> +#include <dlib/data_io.h> +#include <dlib/gui_widgets.h> + +using namespace std; +using namespace dlib; + +// ---------------------------------------------------------------------------------------- + +// The PASCAL VOC2012 dataset contains 20 ground-truth classes + background. Each class +// is represented using an RGB color value. We associate each class also to an index in the +// range [0, 20], used internally by the network. To generate nice RGB representations of +// inference results, we need to be able to convert the index values to the corresponding +// RGB values. + +// Given an index in the range [0, 20], find the corresponding PASCAL VOC2012 class +// (e.g., 'dog'). +const Voc2012class& find_voc2012_class(const uint16_t& index_label) +{ + return find_voc2012_class( + [&index_label](const Voc2012class& voc2012class) + { + return index_label == voc2012class.index; + } + ); +} + +// Convert an index in the range [0, 20] to a corresponding RGB class label. +inline rgb_pixel index_label_to_rgb_label(uint16_t index_label) +{ + return find_voc2012_class(index_label).rgb_label; +} + +// Convert an image containing indexes in the range [0, 20] to a corresponding +// image containing RGB class labels. +void index_label_image_to_rgb_label_image( + const matrix<uint16_t>& index_label_image, + matrix<rgb_pixel>& rgb_label_image +) +{ + const long nr = index_label_image.nr(); + const long nc = index_label_image.nc(); + + rgb_label_image.set_size(nr, nc); + + for (long r = 0; r < nr; ++r) + { + for (long c = 0; c < nc; ++c) + { + rgb_label_image(r, c) = index_label_to_rgb_label(index_label_image(r, c)); + } + } +} + +// Find the most prominent class label from amongst the per-pixel predictions. +std::string get_most_prominent_non_background_classlabel(const matrix<uint16_t>& index_label_image) +{ + const long nr = index_label_image.nr(); + const long nc = index_label_image.nc(); + + std::vector<unsigned int> counters(class_count); + + for (long r = 0; r < nr; ++r) + { + for (long c = 0; c < nc; ++c) + { + const uint16_t label = index_label_image(r, c); + ++counters[label]; + } + } + + const auto max_element = std::max_element(counters.begin() + 1, counters.end()); + const uint16_t most_prominent_index_label = max_element - counters.begin(); + + return find_voc2012_class(most_prominent_index_label).classlabel; +} + +// ---------------------------------------------------------------------------------------- + +int main(int argc, char** argv) try +{ + if (argc != 2) + { + cout << "You call this program like this: " << endl; + cout << "./dnn_semantic_segmentation_train_ex /path/to/images" << endl; + cout << endl; + cout << "You will also need a trained 'semantic_segmentation_voc2012net.dnn' file." << endl; + cout << "You can either train it yourself (see example program" << endl; + cout << "dnn_semantic_segmentation_train_ex), or download a" << endl; + cout << "copy from here: http://dlib.net/files/semantic_segmentation_voc2012net.dnn" << endl; + return 1; + } + + // Read the file containing the trained network from the working directory. + anet_type net; + deserialize("semantic_segmentation_voc2012net.dnn") >> net; + + // Show inference results in a window. + image_window win; + + matrix<rgb_pixel> input_image; + matrix<uint16_t> index_label_image; + matrix<rgb_pixel> rgb_label_image; + + // Find supported image files. + const std::vector<file> files = dlib::get_files_in_directory_tree(argv[1], + dlib::match_endings(".jpeg .jpg .png")); + + cout << "Found " << files.size() << " images, processing..." << endl; + + for (const file& file : files) + { + // Load the input image. + load_image(input_image, file.full_name()); + + // Create predictions for each pixel. At this point, the type of each prediction + // is an index (a value between 0 and 20). Note that the net may return an image + // that is not exactly the same size as the input. + const matrix<uint16_t> temp = net(input_image); + + // Crop the returned image to be exactly the same size as the input. + const chip_details chip_details( + centered_rect(temp.nc() / 2, temp.nr() / 2, input_image.nc(), input_image.nr()), + chip_dims(input_image.nr(), input_image.nc()) + ); + extract_image_chip(temp, chip_details, index_label_image, interpolate_nearest_neighbor()); + + // Convert the indexes to RGB values. + index_label_image_to_rgb_label_image(index_label_image, rgb_label_image); + + // Show the input image on the left, and the predicted RGB labels on the right. + win.set_image(join_rows(input_image, rgb_label_image)); + + // Find the most prominent class label from amongst the per-pixel predictions. + const std::string classlabel = get_most_prominent_non_background_classlabel(index_label_image); + + cout << file.name() << " : " << classlabel << " - hit enter to process the next image"; + cin.get(); + } +} +catch(std::exception& e) +{ + cout << e.what() << endl; +} + diff --git a/ml/dlib/examples/dnn_semantic_segmentation_ex.h b/ml/dlib/examples/dnn_semantic_segmentation_ex.h new file mode 100644 index 00000000..47fc102c --- /dev/null +++ b/ml/dlib/examples/dnn_semantic_segmentation_ex.h @@ -0,0 +1,200 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + Semantic segmentation using the PASCAL VOC2012 dataset. + + In segmentation, the task is to assign each pixel of an input image + a label - for example, 'dog'. Then, the idea is that neighboring + pixels having the same label can be connected together to form a + larger region, representing a complete (or partially occluded) dog. + So technically, segmentation can be viewed as classification of + individual pixels (using the relevant context in the input images), + however the goal usually is to identify meaningful regions that + represent complete entities of interest (such as dogs). + + Instructions how to run the example: + 1. Download the PASCAL VOC2012 data, and untar it somewhere. + http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar + 2. Build the dnn_semantic_segmentation_train_ex example program. + 3. Run: + ./dnn_semantic_segmentation_train_ex /path/to/VOC2012 + 4. Wait while the network is being trained. + 5. Build the dnn_semantic_segmentation_ex example program. + 6. Run: + ./dnn_semantic_segmentation_ex /path/to/VOC2012-or-other-images + + An alternative to steps 2-4 above is to download a pre-trained network + from here: http://dlib.net/files/semantic_segmentation_voc2012net.dnn + + It would be a good idea to become familiar with dlib's DNN tooling before reading this + example. So you should read dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp + before reading this example program. +*/ + +#ifndef DLIB_DNn_SEMANTIC_SEGMENTATION_EX_H_ +#define DLIB_DNn_SEMANTIC_SEGMENTATION_EX_H_ + +#include <dlib/dnn.h> + +// ---------------------------------------------------------------------------------------- + +inline bool operator == (const dlib::rgb_pixel& a, const dlib::rgb_pixel& b) +{ + return a.red == b.red && a.green == b.green && a.blue == b.blue; +} + +// ---------------------------------------------------------------------------------------- + +// The PASCAL VOC2012 dataset contains 20 ground-truth classes + background. Each class +// is represented using an RGB color value. We associate each class also to an index in the +// range [0, 20], used internally by the network. + +struct Voc2012class { + Voc2012class(uint16_t index, const dlib::rgb_pixel& rgb_label, const std::string& classlabel) + : index(index), rgb_label(rgb_label), classlabel(classlabel) + {} + + // The index of the class. In the PASCAL VOC 2012 dataset, indexes from 0 to 20 are valid. + const uint16_t index = 0; + + // The corresponding RGB representation of the class. + const dlib::rgb_pixel rgb_label; + + // The label of the class in plain text. + const std::string classlabel; +}; + +namespace { + constexpr int class_count = 21; // background + 20 classes + + const std::vector<Voc2012class> classes = { + Voc2012class(0, dlib::rgb_pixel(0, 0, 0), ""), // background + + // The cream-colored `void' label is used in border regions and to mask difficult objects + // (see http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/devkit_doc.html) + Voc2012class(dlib::loss_multiclass_log_per_pixel_::label_to_ignore, + dlib::rgb_pixel(224, 224, 192), "border"), + + Voc2012class(1, dlib::rgb_pixel(128, 0, 0), "aeroplane"), + Voc2012class(2, dlib::rgb_pixel( 0, 128, 0), "bicycle"), + Voc2012class(3, dlib::rgb_pixel(128, 128, 0), "bird"), + Voc2012class(4, dlib::rgb_pixel( 0, 0, 128), "boat"), + Voc2012class(5, dlib::rgb_pixel(128, 0, 128), "bottle"), + Voc2012class(6, dlib::rgb_pixel( 0, 128, 128), "bus"), + Voc2012class(7, dlib::rgb_pixel(128, 128, 128), "car"), + Voc2012class(8, dlib::rgb_pixel( 64, 0, 0), "cat"), + Voc2012class(9, dlib::rgb_pixel(192, 0, 0), "chair"), + Voc2012class(10, dlib::rgb_pixel( 64, 128, 0), "cow"), + Voc2012class(11, dlib::rgb_pixel(192, 128, 0), "diningtable"), + Voc2012class(12, dlib::rgb_pixel( 64, 0, 128), "dog"), + Voc2012class(13, dlib::rgb_pixel(192, 0, 128), "horse"), + Voc2012class(14, dlib::rgb_pixel( 64, 128, 128), "motorbike"), + Voc2012class(15, dlib::rgb_pixel(192, 128, 128), "person"), + Voc2012class(16, dlib::rgb_pixel( 0, 64, 0), "pottedplant"), + Voc2012class(17, dlib::rgb_pixel(128, 64, 0), "sheep"), + Voc2012class(18, dlib::rgb_pixel( 0, 192, 0), "sofa"), + Voc2012class(19, dlib::rgb_pixel(128, 192, 0), "train"), + Voc2012class(20, dlib::rgb_pixel( 0, 64, 128), "tvmonitor"), + }; +} + +template <typename Predicate> +const Voc2012class& find_voc2012_class(Predicate predicate) +{ + const auto i = std::find_if(classes.begin(), classes.end(), predicate); + + if (i != classes.end()) + { + return *i; + } + else + { + throw std::runtime_error("Unable to find a matching VOC2012 class"); + } +} + +// ---------------------------------------------------------------------------------------- + +// Introduce the building blocks used to define the segmentation network. +// The network first does residual downsampling (similar to the dnn_imagenet_(train_)ex +// example program), and then residual upsampling. The network could be improved e.g. +// by introducing skip connections from the input image, and/or the first layers, to the +// last layer(s). (See Long et al., Fully Convolutional Networks for Semantic Segmentation, +// https://people.eecs.berkeley.edu/~jonlong/long_shelhamer_fcn.pdf) + +template <int N, template <typename> class BN, int stride, typename SUBNET> +using block = BN<dlib::con<N,3,3,1,1, dlib::relu<BN<dlib::con<N,3,3,stride,stride,SUBNET>>>>>; + +template <int N, template <typename> class BN, int stride, typename SUBNET> +using blockt = BN<dlib::cont<N,3,3,1,1,dlib::relu<BN<dlib::cont<N,3,3,stride,stride,SUBNET>>>>>; + +template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET> +using residual = dlib::add_prev1<block<N,BN,1,dlib::tag1<SUBNET>>>; + +template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET> +using residual_down = dlib::add_prev2<dlib::avg_pool<2,2,2,2,dlib::skip1<dlib::tag2<block<N,BN,2,dlib::tag1<SUBNET>>>>>>; + +template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET> +using residual_up = dlib::add_prev2<dlib::cont<N,2,2,2,2,dlib::skip1<dlib::tag2<blockt<N,BN,2,dlib::tag1<SUBNET>>>>>>; + +template <int N, typename SUBNET> using res = dlib::relu<residual<block,N,dlib::bn_con,SUBNET>>; +template <int N, typename SUBNET> using ares = dlib::relu<residual<block,N,dlib::affine,SUBNET>>; +template <int N, typename SUBNET> using res_down = dlib::relu<residual_down<block,N,dlib::bn_con,SUBNET>>; +template <int N, typename SUBNET> using ares_down = dlib::relu<residual_down<block,N,dlib::affine,SUBNET>>; +template <int N, typename SUBNET> using res_up = dlib::relu<residual_up<block,N,dlib::bn_con,SUBNET>>; +template <int N, typename SUBNET> using ares_up = dlib::relu<residual_up<block,N,dlib::affine,SUBNET>>; + +// ---------------------------------------------------------------------------------------- + +template <typename SUBNET> using res512 = res<512, SUBNET>; +template <typename SUBNET> using res256 = res<256, SUBNET>; +template <typename SUBNET> using res128 = res<128, SUBNET>; +template <typename SUBNET> using res64 = res<64, SUBNET>; +template <typename SUBNET> using ares512 = ares<512, SUBNET>; +template <typename SUBNET> using ares256 = ares<256, SUBNET>; +template <typename SUBNET> using ares128 = ares<128, SUBNET>; +template <typename SUBNET> using ares64 = ares<64, SUBNET>; + + +template <typename SUBNET> using level1 = dlib::repeat<2,res512,res_down<512,SUBNET>>; +template <typename SUBNET> using level2 = dlib::repeat<2,res256,res_down<256,SUBNET>>; +template <typename SUBNET> using level3 = dlib::repeat<2,res128,res_down<128,SUBNET>>; +template <typename SUBNET> using level4 = dlib::repeat<2,res64,res<64,SUBNET>>; + +template <typename SUBNET> using alevel1 = dlib::repeat<2,ares512,ares_down<512,SUBNET>>; +template <typename SUBNET> using alevel2 = dlib::repeat<2,ares256,ares_down<256,SUBNET>>; +template <typename SUBNET> using alevel3 = dlib::repeat<2,ares128,ares_down<128,SUBNET>>; +template <typename SUBNET> using alevel4 = dlib::repeat<2,ares64,ares<64,SUBNET>>; + +template <typename SUBNET> using level1t = dlib::repeat<2,res512,res_up<512,SUBNET>>; +template <typename SUBNET> using level2t = dlib::repeat<2,res256,res_up<256,SUBNET>>; +template <typename SUBNET> using level3t = dlib::repeat<2,res128,res_up<128,SUBNET>>; +template <typename SUBNET> using level4t = dlib::repeat<2,res64,res_up<64,SUBNET>>; + +template <typename SUBNET> using alevel1t = dlib::repeat<2,ares512,ares_up<512,SUBNET>>; +template <typename SUBNET> using alevel2t = dlib::repeat<2,ares256,ares_up<256,SUBNET>>; +template <typename SUBNET> using alevel3t = dlib::repeat<2,ares128,ares_up<128,SUBNET>>; +template <typename SUBNET> using alevel4t = dlib::repeat<2,ares64,ares_up<64,SUBNET>>; + +// ---------------------------------------------------------------------------------------- + +// training network type +using net_type = dlib::loss_multiclass_log_per_pixel< + dlib::cont<class_count,7,7,2,2, + level4t<level3t<level2t<level1t< + level1<level2<level3<level4< + dlib::max_pool<3,3,2,2,dlib::relu<dlib::bn_con<dlib::con<64,7,7,2,2, + dlib::input<dlib::matrix<dlib::rgb_pixel>> + >>>>>>>>>>>>>>; + +// testing network type (replaced batch normalization with fixed affine transforms) +using anet_type = dlib::loss_multiclass_log_per_pixel< + dlib::cont<class_count,7,7,2,2, + alevel4t<alevel3t<alevel2t<alevel1t< + alevel1<alevel2<alevel3<alevel4< + dlib::max_pool<3,3,2,2,dlib::relu<dlib::affine<dlib::con<64,7,7,2,2, + dlib::input<dlib::matrix<dlib::rgb_pixel>> + >>>>>>>>>>>>>>; + +// ---------------------------------------------------------------------------------------- + +#endif // DLIB_DNn_SEMANTIC_SEGMENTATION_EX_H_ diff --git a/ml/dlib/examples/dnn_semantic_segmentation_train_ex.cpp b/ml/dlib/examples/dnn_semantic_segmentation_train_ex.cpp new file mode 100644 index 00000000..0de8c9f4 --- /dev/null +++ b/ml/dlib/examples/dnn_semantic_segmentation_train_ex.cpp @@ -0,0 +1,390 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This example shows how to train a semantic segmentation net using the PASCAL VOC2012 + dataset. For an introduction to what segmentation is, see the accompanying header file + dnn_semantic_segmentation_ex.h. + + Instructions how to run the example: + 1. Download the PASCAL VOC2012 data, and untar it somewhere. + http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar + 2. Build the dnn_semantic_segmentation_train_ex example program. + 3. Run: + ./dnn_semantic_segmentation_train_ex /path/to/VOC2012 + 4. Wait while the network is being trained. + 5. Build the dnn_semantic_segmentation_ex example program. + 6. Run: + ./dnn_semantic_segmentation_ex /path/to/VOC2012-or-other-images + + It would be a good idea to become familiar with dlib's DNN tooling before reading this + example. So you should read dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp + before reading this example program. +*/ + +#include "dnn_semantic_segmentation_ex.h" + +#include <iostream> +#include <dlib/data_io.h> +#include <dlib/image_transforms.h> +#include <dlib/dir_nav.h> +#include <iterator> +#include <thread> + +using namespace std; +using namespace dlib; + +// A single training sample. A mini-batch comprises many of these. +struct training_sample +{ + matrix<rgb_pixel> input_image; + matrix<uint16_t> label_image; // The ground-truth label of each pixel. +}; + +// ---------------------------------------------------------------------------------------- + +rectangle make_random_cropping_rect_resnet( + const matrix<rgb_pixel>& img, + dlib::rand& rnd +) +{ + // figure out what rectangle we want to crop from the image + double mins = 0.466666666, maxs = 0.875; + auto scale = mins + rnd.get_random_double()*(maxs-mins); + auto size = scale*std::min(img.nr(), img.nc()); + rectangle rect(size, size); + // randomly shift the box around + point offset(rnd.get_random_32bit_number()%(img.nc()-rect.width()), + rnd.get_random_32bit_number()%(img.nr()-rect.height())); + return move_rect(rect, offset); +} + +// ---------------------------------------------------------------------------------------- + +void randomly_crop_image ( + const matrix<rgb_pixel>& input_image, + const matrix<uint16_t>& label_image, + training_sample& crop, + dlib::rand& rnd +) +{ + const auto rect = make_random_cropping_rect_resnet(input_image, rnd); + + const chip_details chip_details(rect, chip_dims(227, 227)); + + // Crop the input image. + extract_image_chip(input_image, chip_details, crop.input_image, interpolate_bilinear()); + + // Crop the labels correspondingly. However, note that here bilinear + // interpolation would make absolutely no sense - you wouldn't say that + // a bicycle is half-way between an aeroplane and a bird, would you? + extract_image_chip(label_image, chip_details, crop.label_image, interpolate_nearest_neighbor()); + + // Also randomly flip the input image and the labels. + if (rnd.get_random_double() > 0.5) + { + crop.input_image = fliplr(crop.input_image); + crop.label_image = fliplr(crop.label_image); + } + + // And then randomly adjust the colors. + apply_random_color_offset(crop.input_image, rnd); +} + +// ---------------------------------------------------------------------------------------- + +// The names of the input image and the associated RGB label image in the PASCAL VOC 2012 +// data set. +struct image_info +{ + string image_filename; + string label_filename; +}; + +// Read the list of image files belonging to either the "train", "trainval", or "val" set +// of the PASCAL VOC2012 data. +std::vector<image_info> get_pascal_voc2012_listing( + const std::string& voc2012_folder, + const std::string& file = "train" // "train", "trainval", or "val" +) +{ + std::ifstream in(voc2012_folder + "/ImageSets/Segmentation/" + file + ".txt"); + + std::vector<image_info> results; + + while (in) + { + std::string basename; + in >> basename; + + if (!basename.empty()) + { + image_info image_info; + image_info.image_filename = voc2012_folder + "/JPEGImages/" + basename + ".jpg"; + image_info.label_filename = voc2012_folder + "/SegmentationClass/" + basename + ".png"; + results.push_back(image_info); + } + } + + return results; +} + +// Read the list of image files belong to the "train" set of the PASCAL VOC2012 data. +std::vector<image_info> get_pascal_voc2012_train_listing( + const std::string& voc2012_folder +) +{ + return get_pascal_voc2012_listing(voc2012_folder, "train"); +} + +// Read the list of image files belong to the "val" set of the PASCAL VOC2012 data. +std::vector<image_info> get_pascal_voc2012_val_listing( + const std::string& voc2012_folder +) +{ + return get_pascal_voc2012_listing(voc2012_folder, "val"); +} + +// ---------------------------------------------------------------------------------------- + +// The PASCAL VOC2012 dataset contains 20 ground-truth classes + background. Each class +// is represented using an RGB color value. We associate each class also to an index in the +// range [0, 20], used internally by the network. To convert the ground-truth data to +// something that the network can efficiently digest, we need to be able to map the RGB +// values to the corresponding indexes. + +// Given an RGB representation, find the corresponding PASCAL VOC2012 class +// (e.g., 'dog'). +const Voc2012class& find_voc2012_class(const dlib::rgb_pixel& rgb_label) +{ + return find_voc2012_class( + [&rgb_label](const Voc2012class& voc2012class) + { + return rgb_label == voc2012class.rgb_label; + } + ); +} + +// Convert an RGB class label to an index in the range [0, 20]. +inline uint16_t rgb_label_to_index_label(const dlib::rgb_pixel& rgb_label) +{ + return find_voc2012_class(rgb_label).index; +} + +// Convert an image containing RGB class labels to a corresponding +// image containing indexes in the range [0, 20]. +void rgb_label_image_to_index_label_image( + const dlib::matrix<dlib::rgb_pixel>& rgb_label_image, + dlib::matrix<uint16_t>& index_label_image +) +{ + const long nr = rgb_label_image.nr(); + const long nc = rgb_label_image.nc(); + + index_label_image.set_size(nr, nc); + + for (long r = 0; r < nr; ++r) + { + for (long c = 0; c < nc; ++c) + { + index_label_image(r, c) = rgb_label_to_index_label(rgb_label_image(r, c)); + } + } +} + +// ---------------------------------------------------------------------------------------- + +// Calculate the per-pixel accuracy on a dataset whose file names are supplied as a parameter. +double calculate_accuracy(anet_type& anet, const std::vector<image_info>& dataset) +{ + int num_right = 0; + int num_wrong = 0; + + matrix<rgb_pixel> input_image; + matrix<rgb_pixel> rgb_label_image; + matrix<uint16_t> index_label_image; + matrix<uint16_t> net_output; + + for (const auto& image_info : dataset) + { + // Load the input image. + load_image(input_image, image_info.image_filename); + + // Load the ground-truth (RGB) labels. + load_image(rgb_label_image, image_info.label_filename); + + // Create predictions for each pixel. At this point, the type of each prediction + // is an index (a value between 0 and 20). Note that the net may return an image + // that is not exactly the same size as the input. + const matrix<uint16_t> temp = anet(input_image); + + // Convert the indexes to RGB values. + rgb_label_image_to_index_label_image(rgb_label_image, index_label_image); + + // Crop the net output to be exactly the same size as the input. + const chip_details chip_details( + centered_rect(temp.nc() / 2, temp.nr() / 2, input_image.nc(), input_image.nr()), + chip_dims(input_image.nr(), input_image.nc()) + ); + extract_image_chip(temp, chip_details, net_output, interpolate_nearest_neighbor()); + + const long nr = index_label_image.nr(); + const long nc = index_label_image.nc(); + + // Compare the predicted values to the ground-truth values. + for (long r = 0; r < nr; ++r) + { + for (long c = 0; c < nc; ++c) + { + const uint16_t truth = index_label_image(r, c); + if (truth != dlib::loss_multiclass_log_per_pixel_::label_to_ignore) + { + const uint16_t prediction = net_output(r, c); + if (prediction == truth) + { + ++num_right; + } + else + { + ++num_wrong; + } + } + } + } + } + + // Return the accuracy estimate. + return num_right / static_cast<double>(num_right + num_wrong); +} + +// ---------------------------------------------------------------------------------------- + +int main(int argc, char** argv) try +{ + if (argc != 2) + { + cout << "To run this program you need a copy of the PASCAL VOC2012 dataset." << endl; + cout << endl; + cout << "You call this program like this: " << endl; + cout << "./dnn_semantic_segmentation_train_ex /path/to/VOC2012" << endl; + return 1; + } + + cout << "\nSCANNING PASCAL VOC2012 DATASET\n" << endl; + + const auto listing = get_pascal_voc2012_train_listing(argv[1]); + cout << "images in dataset: " << listing.size() << endl; + if (listing.size() == 0) + { + cout << "Didn't find the VOC2012 dataset. " << endl; + return 1; + } + + + const double initial_learning_rate = 0.1; + const double weight_decay = 0.0001; + const double momentum = 0.9; + + net_type net; + dnn_trainer<net_type> trainer(net,sgd(weight_decay, momentum)); + trainer.be_verbose(); + trainer.set_learning_rate(initial_learning_rate); + trainer.set_synchronization_file("pascal_voc2012_trainer_state_file.dat", std::chrono::minutes(10)); + // This threshold is probably excessively large. + trainer.set_iterations_without_progress_threshold(5000); + // Since the progress threshold is so large might as well set the batch normalization + // stats window to something big too. + set_all_bn_running_stats_window_sizes(net, 1000); + + // Output training parameters. + cout << endl << trainer << endl; + + std::vector<matrix<rgb_pixel>> samples; + std::vector<matrix<uint16_t>> labels; + + // Start a bunch of threads that read images from disk and pull out random crops. It's + // important to be sure to feed the GPU fast enough to keep it busy. Using multiple + // thread for this kind of data preparation helps us do that. Each thread puts the + // crops into the data queue. + dlib::pipe<training_sample> data(200); + auto f = [&data, &listing](time_t seed) + { + dlib::rand rnd(time(0)+seed); + matrix<rgb_pixel> input_image; + matrix<rgb_pixel> rgb_label_image; + matrix<uint16_t> index_label_image; + training_sample temp; + while(data.is_enabled()) + { + // Pick a random input image. + const image_info& image_info = listing[rnd.get_random_32bit_number()%listing.size()]; + + // Load the input image. + load_image(input_image, image_info.image_filename); + + // Load the ground-truth (RGB) labels. + load_image(rgb_label_image, image_info.label_filename); + + // Convert the indexes to RGB values. + rgb_label_image_to_index_label_image(rgb_label_image, index_label_image); + + // Randomly pick a part of the image. + randomly_crop_image(input_image, index_label_image, temp, rnd); + + // Push the result to be used by the trainer. + data.enqueue(temp); + } + }; + std::thread data_loader1([f](){ f(1); }); + std::thread data_loader2([f](){ f(2); }); + std::thread data_loader3([f](){ f(3); }); + std::thread data_loader4([f](){ f(4); }); + + // The main training loop. Keep making mini-batches and giving them to the trainer. + // We will run until the learning rate has dropped by a factor of 1e-4. + while(trainer.get_learning_rate() >= 1e-4) + { + samples.clear(); + labels.clear(); + + // make a 30-image mini-batch + training_sample temp; + while(samples.size() < 30) + { + data.dequeue(temp); + + samples.push_back(std::move(temp.input_image)); + labels.push_back(std::move(temp.label_image)); + } + + trainer.train_one_step(samples, labels); + } + + // Training done, tell threads to stop and make sure to wait for them to finish before + // moving on. + data.disable(); + data_loader1.join(); + data_loader2.join(); + data_loader3.join(); + data_loader4.join(); + + // also wait for threaded processing to stop in the trainer. + trainer.get_net(); + + net.clean(); + cout << "saving network" << endl; + serialize("semantic_segmentation_voc2012net.dnn") << net; + + + // Make a copy of the network to use it for inference. + anet_type anet = net; + + cout << "Testing the network..." << endl; + + // Find the accuracy of the newly trained network on both the training and the validation sets. + cout << "train accuracy : " << calculate_accuracy(anet, get_pascal_voc2012_train_listing(argv[1])) << endl; + cout << "val accuracy : " << calculate_accuracy(anet, get_pascal_voc2012_val_listing(argv[1])) << endl; +} +catch(std::exception& e) +{ + cout << e.what() << endl; +} + diff --git a/ml/dlib/examples/empirical_kernel_map_ex.cpp b/ml/dlib/examples/empirical_kernel_map_ex.cpp new file mode 100644 index 00000000..9f7b1a57 --- /dev/null +++ b/ml/dlib/examples/empirical_kernel_map_ex.cpp @@ -0,0 +1,355 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the empirical_kernel_map + from the dlib C++ Library. + + This example program assumes you are familiar with some general elements of + the library. In particular, you should have at least read the svm_ex.cpp + and matrix_ex.cpp examples. + + + Most of the machine learning algorithms in dlib are some flavor of "kernel machine". + This means they are all simple linear algorithms that have been formulated such + that the only way they look at the data given by a user is via dot products between + the data samples. These algorithms are made more useful via the application of the + so-called kernel trick. This trick is to replace the dot product with a user + supplied function which takes two samples and returns a real number. This function + is the kernel that is required by so many algorithms. The most basic kernel is the + linear_kernel which is simply a normal dot product. More interesting, however, + are kernels which first apply some nonlinear transformation to the user's data samples + and then compute a dot product. In this way, a simple algorithm that finds a linear + plane to separate data (e.g. the SVM algorithm) can be made to solve complex + nonlinear learning problems. + + An important element of the kernel trick is that these kernel functions perform + the nonlinear transformation implicitly. That is, if you look at the implementations + of these kernel functions you won't see code that transforms two input vectors in + some way and then computes their dot products. Instead you will see a simple function + that takes two input vectors and just computes a single real number via some simple + process. You can basically think of this as an optimization. Imagine that originally + we wrote out the entire procedure to perform the nonlinear transformation and then + compute the dot product but then noticed we could cancel a few terms here and there + and simplify the whole thing down into a more compact and easily evaluated form. + The result is a nice function that computes what we want but we no longer get to see + what those nonlinearly transformed input vectors are. + + The empirical_kernel_map is a tool that undoes this. It allows you to obtain these + nonlinearly transformed vectors. It does this by taking a set of data samples from + the user (referred to as basis samples), applying the nonlinear transformation to all + of them, and then constructing a set of orthonormal basis vectors which spans the space + occupied by those transformed input samples. Then if we wish to obtain the nonlinear + version of any data sample we can simply project it onto this orthonormal basis and + we obtain a regular vector of real numbers which represents the nonlinearly transformed + version of the data sample. The empirical_kernel_map has been formulated to use only + dot products between data samples so it is capable of performing this service for any + user supplied kernel function. + + The empirical_kernel_map is useful because it is often difficult to formulate an + algorithm in a way that uses only dot products. So the empirical_kernel_map lets + us easily kernelize any algorithm we like by using this object during a preprocessing + step. However, it should be noted that the algorithm is only practical when used + with at most a few thousand basis samples. Fortunately, most datasets live in + subspaces that are relatively low dimensional. So for these datasets, using the + empirical_kernel_map is practical assuming an appropriate set of basis samples can be + selected by the user. To help with this dlib supplies the linearly_independent_subset_finder. + I also often find that just picking a random subset of the data as a basis works well. + + + + In what follows, we walk through the process of creating an empirical_kernel_map, + projecting data to obtain the nonlinearly transformed vectors, and then doing a + few interesting things with the data. +*/ + + + + +#include <dlib/svm.h> +#include <dlib/rand.h> +#include <iostream> +#include <vector> + + +using namespace std; +using namespace dlib; + +// ---------------------------------------------------------------------------------------- + +// First let's make a typedef for the kind of samples we will be using. +typedef matrix<double, 0, 1> sample_type; + +// We will be using the radial_basis_kernel in this example program. +typedef radial_basis_kernel<sample_type> kernel_type; + +// ---------------------------------------------------------------------------------------- + +void generate_concentric_circles ( + std::vector<sample_type>& samples, + std::vector<double>& labels, + const int num_points +); +/*! + requires + - num_points > 0 + ensures + - generates two circles centered at the point (0,0), one of radius 1 and + the other of radius 5. These points are stored into samples. labels will + tell you if a given samples is from the smaller circle (its label will be 1) + or from the larger circle (its label will be 2). + - each circle will be made up of num_points +!*/ + +// ---------------------------------------------------------------------------------------- + +void test_empirical_kernel_map ( + const std::vector<sample_type>& samples, + const std::vector<double>& labels, + const empirical_kernel_map<kernel_type>& ekm +); +/*! + This function computes various interesting things with the empirical_kernel_map. + See its implementation below for details. +!*/ + +// ---------------------------------------------------------------------------------------- + +int main() +{ + std::vector<sample_type> samples; + std::vector<double> labels; + + // Declare an instance of the kernel we will be using. + const kernel_type kern(0.1); + + // create a dataset with two concentric circles. There will be 100 points on each circle. + generate_concentric_circles(samples, labels, 100); + + empirical_kernel_map<kernel_type> ekm; + + + // Here we create an empirical_kernel_map using all of our data samples as basis samples. + cout << "\n\nBuilding an empirical_kernel_map with " << samples.size() << " basis samples." << endl; + ekm.load(kern, samples); + cout << "Test the empirical_kernel_map when loaded with every sample." << endl; + test_empirical_kernel_map(samples, labels, ekm); + + + + + + + // create a new dataset with two concentric circles. There will be 1000 points on each circle. + generate_concentric_circles(samples, labels, 1000); + // Rather than use all 2000 samples as basis samples we are going to use the + // linearly_independent_subset_finder to pick out a good basis set. The idea behind this + // object is to try and find the 40 or so samples that best spans the subspace containing all the + // data. + linearly_independent_subset_finder<kernel_type> lisf(kern, 40); + // populate lisf with samples. We have configured it to allow at most 40 samples but this function + // may determine that fewer samples are necessary to form a good basis. In this example program + // it will select only 26. + fill_lisf(lisf, samples); + + // Now reload the empirical_kernel_map but this time using only our small basis + // selected using the linearly_independent_subset_finder. + cout << "\n\nBuilding an empirical_kernel_map with " << lisf.size() << " basis samples." << endl; + ekm.load(lisf); + cout << "Test the empirical_kernel_map when loaded with samples from the lisf object." << endl; + test_empirical_kernel_map(samples, labels, ekm); + + + cout << endl; +} + +// ---------------------------------------------------------------------------------------- + +void test_empirical_kernel_map ( + const std::vector<sample_type>& samples, + const std::vector<double>& labels, + const empirical_kernel_map<kernel_type>& ekm +) +{ + + std::vector<sample_type> projected_samples; + + // The first thing we do is compute the nonlinearly projected vectors using the + // empirical_kernel_map. + for (unsigned long i = 0; i < samples.size(); ++i) + { + projected_samples.push_back(ekm.project(samples[i])); + } + + // Note that a kernel matrix is just a matrix M such that M(i,j) == kernel(samples[i],samples[j]). + // So below we are computing the normal kernel matrix as given by the radial_basis_kernel and the + // input samples. We also compute the kernel matrix for all the projected_samples as given by the + // linear_kernel. Note that the linear_kernel just computes normal dot products. So what we want to + // see is that the dot products between all the projected_samples samples are the same as the outputs + // of the kernel function for their respective untransformed input samples. If they match then + // we know that the empirical_kernel_map is working properly. + const matrix<double> normal_kernel_matrix = kernel_matrix(ekm.get_kernel(), samples); + const matrix<double> new_kernel_matrix = kernel_matrix(linear_kernel<sample_type>(), projected_samples); + + cout << "Max kernel matrix error: " << max(abs(normal_kernel_matrix - new_kernel_matrix)) << endl; + cout << "Mean kernel matrix error: " << mean(abs(normal_kernel_matrix - new_kernel_matrix)) << endl; + /* + Example outputs from these cout statements. + For the case where we use all samples as basis samples: + Max kernel matrix error: 7.32747e-15 + Mean kernel matrix error: 7.47789e-16 + + For the case where we use only 26 samples as basis samples: + Max kernel matrix error: 0.000953573 + Mean kernel matrix error: 2.26008e-05 + + + Note that if we use enough basis samples we can perfectly span the space of input samples. + In that case we get errors that are essentially just rounding noise (Moreover, using all the + samples is always enough since they are always within their own span). Once we start + to use fewer basis samples we may begin to get approximation error. In the second case we + used 26 and we can see that the data doesn't really lay exactly in a 26 dimensional subspace. + But it is pretty close. + */ + + + + // Now let's do something more interesting. The following loop finds the centroids + // of the two classes of data. + sample_type class1_center; + sample_type class2_center; + for (unsigned long i = 0; i < projected_samples.size(); ++i) + { + if (labels[i] == 1) + class1_center += projected_samples[i]; + else + class2_center += projected_samples[i]; + } + + const int points_per_class = samples.size()/2; + class1_center /= points_per_class; + class2_center /= points_per_class; + + + // Now classify points by which center they are nearest. Recall that the data + // is made up of two concentric circles. Normally you can't separate two concentric + // circles by checking which points are nearest to each center since they have the same + // centers. However, the kernel trick makes the data separable and the loop below will + // perfectly classify each data point. + for (unsigned long i = 0; i < projected_samples.size(); ++i) + { + double distance_to_class1 = length(projected_samples[i] - class1_center); + double distance_to_class2 = length(projected_samples[i] - class2_center); + + bool predicted_as_class_1 = (distance_to_class1 < distance_to_class2); + + // Now print a message for any misclassified points. + if (predicted_as_class_1 == true && labels[i] != 1) + cout << "A point was misclassified" << endl; + + if (predicted_as_class_1 == false && labels[i] != 2) + cout << "A point was misclassified" << endl; + } + + + + // Next, note that classifying a point based on its distance between two other + // points is the same thing as using the plane that lies between those two points + // as a decision boundary. So let's compute that decision plane and use it to classify + // all the points. + + sample_type plane_normal_vector = class1_center - class2_center; + // The point right in the center of our two classes should be on the deciding plane, not + // on one side or the other. This consideration brings us to the formula for the bias. + double bias = dot((class1_center+class2_center)/2, plane_normal_vector); + + // Now classify points by which side of the plane they are on. + for (unsigned long i = 0; i < projected_samples.size(); ++i) + { + double side = dot(plane_normal_vector, projected_samples[i]) - bias; + + bool predicted_as_class_1 = (side > 0); + + // Now print a message for any misclassified points. + if (predicted_as_class_1 == true && labels[i] != 1) + cout << "A point was misclassified" << endl; + + if (predicted_as_class_1 == false && labels[i] != 2) + cout << "A point was misclassified" << endl; + } + + + // It would be nice to convert this decision rule into a normal decision_function object and + // dispense with the empirical_kernel_map. Happily, it is possible to do so. Consider the + // following example code: + decision_function<kernel_type> dec_funct = ekm.convert_to_decision_function(plane_normal_vector); + // The dec_funct now computes dot products between plane_normal_vector and the projection + // of any sample point given to it. All that remains is to account for the bias. + dec_funct.b = bias; + + // now classify points by which side of the plane they are on. + for (unsigned long i = 0; i < samples.size(); ++i) + { + double side = dec_funct(samples[i]); + + // And let's just check that the dec_funct really does compute the same thing as the previous equation. + double side_alternate_equation = dot(plane_normal_vector, projected_samples[i]) - bias; + if (abs(side-side_alternate_equation) > 1e-14) + cout << "dec_funct error: " << abs(side-side_alternate_equation) << endl; + + bool predicted_as_class_1 = (side > 0); + + // Now print a message for any misclassified points. + if (predicted_as_class_1 == true && labels[i] != 1) + cout << "A point was misclassified" << endl; + + if (predicted_as_class_1 == false && labels[i] != 2) + cout << "A point was misclassified" << endl; + } + +} + +// ---------------------------------------------------------------------------------------- + +void generate_concentric_circles ( + std::vector<sample_type>& samples, + std::vector<double>& labels, + const int num +) +{ + sample_type m(2,1); + samples.clear(); + labels.clear(); + + dlib::rand rnd; + + // make some samples near the origin + double radius = 1.0; + for (long i = 0; i < num; ++i) + { + double sign = 1; + if (rnd.get_random_double() < 0.5) + sign = -1; + m(0) = 2*radius*rnd.get_random_double()-radius; + m(1) = sign*sqrt(radius*radius - m(0)*m(0)); + + samples.push_back(m); + labels.push_back(1); + } + + // make some samples in a circle around the origin but far away + radius = 5.0; + for (long i = 0; i < num; ++i) + { + double sign = 1; + if (rnd.get_random_double() < 0.5) + sign = -1; + m(0) = 2*radius*rnd.get_random_double()-radius; + m(1) = sign*sqrt(radius*radius - m(0)*m(0)); + + samples.push_back(m); + labels.push_back(2); + } +} + +// ---------------------------------------------------------------------------------------- + diff --git a/ml/dlib/examples/face_detection_ex.cpp b/ml/dlib/examples/face_detection_ex.cpp new file mode 100644 index 00000000..9569d44e --- /dev/null +++ b/ml/dlib/examples/face_detection_ex.cpp @@ -0,0 +1,103 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This example program shows how to find frontal human faces in an image. In + particular, this program shows how you can take a list of images from the + command line and display each on the screen with red boxes overlaid on each + human face. + + The examples/faces folder contains some jpg images of people. You can run + this program on them and see the detections by executing the following command: + ./face_detection_ex faces/*.jpg + + + This face detector is made using the now classic Histogram of Oriented + Gradients (HOG) feature combined with a linear classifier, an image pyramid, + and sliding window detection scheme. This type of object detector is fairly + general and capable of detecting many types of semi-rigid objects in + addition to human faces. Therefore, if you are interested in making your + own object detectors then read the fhog_object_detector_ex.cpp example + program. It shows how to use the machine learning tools which were used to + create dlib's face detector. + + + Finally, note that the face detector is fastest when compiled with at least + SSE2 instructions enabled. So if you are using a PC with an Intel or AMD + chip then you should enable at least SSE2 instructions. If you are using + cmake to compile this program you can enable them by using one of the + following commands when you create the build project: + cmake path_to_dlib_root/examples -DUSE_SSE2_INSTRUCTIONS=ON + cmake path_to_dlib_root/examples -DUSE_SSE4_INSTRUCTIONS=ON + cmake path_to_dlib_root/examples -DUSE_AVX_INSTRUCTIONS=ON + This will set the appropriate compiler options for GCC, clang, Visual + Studio, or the Intel compiler. If you are using another compiler then you + need to consult your compiler's manual to determine how to enable these + instructions. Note that AVX is the fastest but requires a CPU from at least + 2011. SSE4 is the next fastest and is supported by most current machines. +*/ + + +#include <dlib/image_processing/frontal_face_detector.h> +#include <dlib/gui_widgets.h> +#include <dlib/image_io.h> +#include <iostream> + +using namespace dlib; +using namespace std; + +// ---------------------------------------------------------------------------------------- + +int main(int argc, char** argv) +{ + try + { + if (argc == 1) + { + cout << "Give some image files as arguments to this program." << endl; + return 0; + } + + frontal_face_detector detector = get_frontal_face_detector(); + image_window win; + + // Loop over all the images provided on the command line. + for (int i = 1; i < argc; ++i) + { + cout << "processing image " << argv[i] << endl; + array2d<unsigned char> img; + load_image(img, argv[i]); + // Make the image bigger by a factor of two. This is useful since + // the face detector looks for faces that are about 80 by 80 pixels + // or larger. Therefore, if you want to find faces that are smaller + // than that then you need to upsample the image as we do here by + // calling pyramid_up(). So this will allow it to detect faces that + // are at least 40 by 40 pixels in size. We could call pyramid_up() + // again to find even smaller faces, but note that every time we + // upsample the image we make the detector run slower since it must + // process a larger image. + pyramid_up(img); + + // Now tell the face detector to give us a list of bounding boxes + // around all the faces it can find in the image. + std::vector<rectangle> dets = detector(img); + + cout << "Number of faces detected: " << dets.size() << endl; + // Now we show the image on the screen and the face detections as + // red overlay boxes. + win.clear_overlay(); + win.set_image(img); + win.add_overlay(dets, rgb_pixel(255,0,0)); + + cout << "Hit enter to process the next image..." << endl; + cin.get(); + } + } + catch (exception& e) + { + cout << "\nexception thrown!" << endl; + cout << e.what() << endl; + } +} + +// ---------------------------------------------------------------------------------------- + diff --git a/ml/dlib/examples/face_landmark_detection_ex.cpp b/ml/dlib/examples/face_landmark_detection_ex.cpp new file mode 100644 index 00000000..6ab7fdf9 --- /dev/null +++ b/ml/dlib/examples/face_landmark_detection_ex.cpp @@ -0,0 +1,144 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This example program shows how to find frontal human faces in an image and + estimate their pose. The pose takes the form of 68 landmarks. These are + points on the face such as the corners of the mouth, along the eyebrows, on + the eyes, and so forth. + + + + The face detector we use is made using the classic Histogram of Oriented + Gradients (HOG) feature combined with a linear classifier, an image pyramid, + and sliding window detection scheme. The pose estimator was created by + using dlib's implementation of the paper: + One Millisecond Face Alignment with an Ensemble of Regression Trees by + Vahid Kazemi and Josephine Sullivan, CVPR 2014 + and was trained on the iBUG 300-W face landmark dataset (see + https://ibug.doc.ic.ac.uk/resources/facial-point-annotations/): + C. Sagonas, E. Antonakos, G, Tzimiropoulos, S. Zafeiriou, M. Pantic. + 300 faces In-the-wild challenge: Database and results. + Image and Vision Computing (IMAVIS), Special Issue on Facial Landmark Localisation "In-The-Wild". 2016. + You can get the trained model file from: + http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2. + Note that the license for the iBUG 300-W dataset excludes commercial use. + So you should contact Imperial College London to find out if it's OK for + you to use this model file in a commercial product. + + + Also, note that you can train your own models using dlib's machine learning + tools. See train_shape_predictor_ex.cpp to see an example. + + + + + Finally, note that the face detector is fastest when compiled with at least + SSE2 instructions enabled. So if you are using a PC with an Intel or AMD + chip then you should enable at least SSE2 instructions. If you are using + cmake to compile this program you can enable them by using one of the + following commands when you create the build project: + cmake path_to_dlib_root/examples -DUSE_SSE2_INSTRUCTIONS=ON + cmake path_to_dlib_root/examples -DUSE_SSE4_INSTRUCTIONS=ON + cmake path_to_dlib_root/examples -DUSE_AVX_INSTRUCTIONS=ON + This will set the appropriate compiler options for GCC, clang, Visual + Studio, or the Intel compiler. If you are using another compiler then you + need to consult your compiler's manual to determine how to enable these + instructions. Note that AVX is the fastest but requires a CPU from at least + 2011. SSE4 is the next fastest and is supported by most current machines. +*/ + + +#include <dlib/image_processing/frontal_face_detector.h> +#include <dlib/image_processing/render_face_detections.h> +#include <dlib/image_processing.h> +#include <dlib/gui_widgets.h> +#include <dlib/image_io.h> +#include <iostream> + +using namespace dlib; +using namespace std; + +// ---------------------------------------------------------------------------------------- + +int main(int argc, char** argv) +{ + try + { + // This example takes in a shape model file and then a list of images to + // process. We will take these filenames in as command line arguments. + // Dlib comes with example images in the examples/faces folder so give + // those as arguments to this program. + if (argc == 1) + { + cout << "Call this program like this:" << endl; + cout << "./face_landmark_detection_ex shape_predictor_68_face_landmarks.dat faces/*.jpg" << endl; + cout << "\nYou can get the shape_predictor_68_face_landmarks.dat file from:\n"; + cout << "http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2" << endl; + return 0; + } + + // We need a face detector. We will use this to get bounding boxes for + // each face in an image. + frontal_face_detector detector = get_frontal_face_detector(); + // And we also need a shape_predictor. This is the tool that will predict face + // landmark positions given an image and face bounding box. Here we are just + // loading the model from the shape_predictor_68_face_landmarks.dat file you gave + // as a command line argument. + shape_predictor sp; + deserialize(argv[1]) >> sp; + + + image_window win, win_faces; + // Loop over all the images provided on the command line. + for (int i = 2; i < argc; ++i) + { + cout << "processing image " << argv[i] << endl; + array2d<rgb_pixel> img; + load_image(img, argv[i]); + // Make the image larger so we can detect small faces. + pyramid_up(img); + + // Now tell the face detector to give us a list of bounding boxes + // around all the faces in the image. + std::vector<rectangle> dets = detector(img); + cout << "Number of faces detected: " << dets.size() << endl; + + // Now we will go ask the shape_predictor to tell us the pose of + // each face we detected. + std::vector<full_object_detection> shapes; + for (unsigned long j = 0; j < dets.size(); ++j) + { + full_object_detection shape = sp(img, dets[j]); + cout << "number of parts: "<< shape.num_parts() << endl; + cout << "pixel position of first part: " << shape.part(0) << endl; + cout << "pixel position of second part: " << shape.part(1) << endl; + // You get the idea, you can get all the face part locations if + // you want them. Here we just store them in shapes so we can + // put them on the screen. + shapes.push_back(shape); + } + + // Now let's view our face poses on the screen. + win.clear_overlay(); + win.set_image(img); + win.add_overlay(render_face_detections(shapes)); + + // We can also extract copies of each face that are cropped, rotated upright, + // and scaled to a standard size as shown here: + dlib::array<array2d<rgb_pixel> > face_chips; + extract_image_chips(img, get_face_chip_details(shapes), face_chips); + win_faces.set_image(tile_images(face_chips)); + + cout << "Hit enter to process the next image..." << endl; + cin.get(); + } + } + catch (exception& e) + { + cout << "\nexception thrown!" << endl; + cout << e.what() << endl; + } +} + +// ---------------------------------------------------------------------------------------- + diff --git a/ml/dlib/examples/faces/2007_007763.jpg b/ml/dlib/examples/faces/2007_007763.jpg Binary files differnew file mode 100755 index 00000000..6f19d2d6 --- /dev/null +++ b/ml/dlib/examples/faces/2007_007763.jpg diff --git a/ml/dlib/examples/faces/2008_001009.jpg b/ml/dlib/examples/faces/2008_001009.jpg Binary files differnew file mode 100755 index 00000000..411aeb3c --- /dev/null +++ b/ml/dlib/examples/faces/2008_001009.jpg diff --git a/ml/dlib/examples/faces/2008_001322.jpg b/ml/dlib/examples/faces/2008_001322.jpg Binary files differnew file mode 100755 index 00000000..354db0b6 --- /dev/null +++ b/ml/dlib/examples/faces/2008_001322.jpg diff --git a/ml/dlib/examples/faces/2008_002079.jpg b/ml/dlib/examples/faces/2008_002079.jpg Binary files differnew file mode 100755 index 00000000..8d19673e --- /dev/null +++ b/ml/dlib/examples/faces/2008_002079.jpg diff --git a/ml/dlib/examples/faces/2008_002470.jpg b/ml/dlib/examples/faces/2008_002470.jpg Binary files differnew file mode 100755 index 00000000..fb0e44cb --- /dev/null +++ b/ml/dlib/examples/faces/2008_002470.jpg diff --git a/ml/dlib/examples/faces/2008_002506.jpg b/ml/dlib/examples/faces/2008_002506.jpg Binary files differnew file mode 100755 index 00000000..7508cb95 --- /dev/null +++ b/ml/dlib/examples/faces/2008_002506.jpg diff --git a/ml/dlib/examples/faces/2008_004176.jpg b/ml/dlib/examples/faces/2008_004176.jpg Binary files differnew file mode 100755 index 00000000..f018b743 --- /dev/null +++ b/ml/dlib/examples/faces/2008_004176.jpg diff --git a/ml/dlib/examples/faces/2008_007676.jpg b/ml/dlib/examples/faces/2008_007676.jpg Binary files differnew file mode 100755 index 00000000..646196f3 --- /dev/null +++ b/ml/dlib/examples/faces/2008_007676.jpg diff --git a/ml/dlib/examples/faces/2009_004587.jpg b/ml/dlib/examples/faces/2009_004587.jpg Binary files differnew file mode 100755 index 00000000..e10c42d9 --- /dev/null +++ b/ml/dlib/examples/faces/2009_004587.jpg diff --git a/ml/dlib/examples/faces/Tom_Cruise_avp_2014_4.jpg b/ml/dlib/examples/faces/Tom_Cruise_avp_2014_4.jpg Binary files differnew file mode 100644 index 00000000..bb2d7332 --- /dev/null +++ b/ml/dlib/examples/faces/Tom_Cruise_avp_2014_4.jpg diff --git a/ml/dlib/examples/faces/bald_guys.jpg b/ml/dlib/examples/faces/bald_guys.jpg Binary files differnew file mode 100644 index 00000000..dbd431f8 --- /dev/null +++ b/ml/dlib/examples/faces/bald_guys.jpg diff --git a/ml/dlib/examples/faces/dogs.jpg b/ml/dlib/examples/faces/dogs.jpg Binary files differnew file mode 100644 index 00000000..15667141 --- /dev/null +++ b/ml/dlib/examples/faces/dogs.jpg diff --git a/ml/dlib/examples/faces/image_metadata_stylesheet.xsl b/ml/dlib/examples/faces/image_metadata_stylesheet.xsl new file mode 100644 index 00000000..5d4a2953 --- /dev/null +++ b/ml/dlib/examples/faces/image_metadata_stylesheet.xsl @@ -0,0 +1,109 @@ +<?xml version="1.0" encoding="ISO-8859-1" ?> + +<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> +<xsl:output method='html' version='1.0' encoding='UTF-8' indent='yes' /> + +<!-- ************************************************************************* --> + + <xsl:variable name="max_images_displayed">30</xsl:variable> + +<!-- ************************************************************************* --> + + <xsl:template match="/dataset"> + <html> + <head> + + <style type="text/css"> + div#box{ + position: absolute; + border-style:solid; + border-width:1px; + border-color:red; + } + + div#circle{ + position: absolute; + border-style:solid; + border-width:1px; + border-color:red; + border-radius:7px; + width:1px; + height: 1px; + } + + div#label{ + position: absolute; + color: red; + } + + div#img{ + position: relative; + margin-bottom:2em; + } + + + pre { + color: black; + margin: 1em 0.25in; + padding: 0.5em; + background: rgb(240,240,240); + border-top: black dotted 1px; + border-left: black dotted 1px; + border-right: black solid 2px; + border-bottom: black solid 2px; + } + + </style> + + </head> + + <body> + Dataset name: <b><xsl:value-of select='/dataset/name'/></b> <br/> + Dataset comment: <pre><xsl:value-of select='/dataset/comment'/></pre> <br/> + Number of images: <xsl:value-of select="count(images/image)"/> <br/> + Number of boxes: <xsl:value-of select="count(images/image/box)"/> <br/> + <br/> + <hr/> + + <!-- Show a warning if we aren't going to show all the images --> + <xsl:if test="count(images/image) > $max_images_displayed"> + <h2>Only displaying the first <xsl:value-of select="$max_images_displayed"/> images.</h2> + <hr/> + </xsl:if> + + + <xsl:for-each select="images/image"> + <!-- Don't try to display too many images. It makes your browser hang --> + <xsl:if test="position() <= $max_images_displayed"> + <b><xsl:value-of select="@file"/></b> (Number of boxes: <xsl:value-of select="count(box)"/>) + <div id="img"> + <img src="{@file}"/> + <xsl:for-each select="box"> + <div id="box" style="top: {@top}px; left: {@left}px; width: {@width}px; height: {@height}px;"></div> + + <!-- If there is a label then display it in the lower right corner. --> + <xsl:if test="label"> + <div id="label" style="top: {@top+@height}px; left: {@left+@width}px;"> + <xsl:value-of select="label"/> + </div> + </xsl:if> + + <xsl:for-each select="part"> + <!-- + <div id="label" style="top: {@y+7}px; left: {@x}px;"> + <xsl:value-of select="@name"/> + </div> + --> + <div id="circle" style="top: {(@y)}px; left: {(@x)}px; "></div> + </xsl:for-each> + </xsl:for-each> + </div> + </xsl:if> + </xsl:for-each> + </body> + </html> + </xsl:template> + + <!-- ************************************************************************* --> + +</xsl:stylesheet> diff --git a/ml/dlib/examples/faces/testing.xml b/ml/dlib/examples/faces/testing.xml new file mode 100644 index 00000000..f7ef446c --- /dev/null +++ b/ml/dlib/examples/faces/testing.xml @@ -0,0 +1,43 @@ +<?xml version='1.0' encoding='ISO-8859-1'?> +<?xml-stylesheet type='text/xsl' href='image_metadata_stylesheet.xsl'?> +<dataset> +<name>Testing faces</name> +<comment>These are images from the PASCAL VOC 2011 dataset.</comment> +<images> + <image file='2008_002470.jpg'> + <box top='181' left='274' width='52' height='53'/> + <box top='156' left='55' width='44' height='44'/> + <box top='166' left='146' width='37' height='37'/> + <box top='55' left='329' width='44' height='44'/> + <box top='74' left='233' width='44' height='44'/> + <box top='86' left='178' width='37' height='37'/> + </image> + <image file='2008_002506.jpg'> + <box top='78' left='329' width='109' height='109'/> + <box top='95' left='224' width='91' height='91'/> + <box top='65' left='125' width='90' height='91'/> + </image> + <image file='2008_004176.jpg'> + <box top='230' left='206' width='37' height='37'/> + <box top='118' left='162' width='37' height='37'/> + <box top='82' left='190' width='37' height='37'/> + <box top='78' left='326' width='37' height='37'/> + <box top='98' left='222' width='37' height='37'/> + <box top='86' left='110' width='37' height='37'/> + <box top='102' left='282' width='37' height='37'/> + </image> + <image file='2008_007676.jpg'> + <box top='62' left='226' width='37' height='37'/> + <box top='113' left='194' width='44' height='44'/> + <box top='130' left='262' width='37' height='37'/> + <box top='134' left='366' width='37' height='37'/> + <box top='122' left='314' width='37' height='37'/> + <box top='141' left='107' width='52' height='53'/> + <box top='84' left='137' width='44' height='44'/> + </image> + <image file='2009_004587.jpg'> + <box top='46' left='154' width='75' height='76'/> + <box top='280' left='266' width='63' height='63'/> + </image> +</images> +</dataset> diff --git a/ml/dlib/examples/faces/testing_with_face_landmarks.xml b/ml/dlib/examples/faces/testing_with_face_landmarks.xml new file mode 100644 index 00000000..7589561b --- /dev/null +++ b/ml/dlib/examples/faces/testing_with_face_landmarks.xml @@ -0,0 +1,1772 @@ +<?xml version='1.0' encoding='ISO-8859-1'?> +<?xml-stylesheet type='text/xsl' href='image_metadata_stylesheet.xsl'?> +<dataset> +<name>Testing faces</name> +<comment>These are images from the PASCAL VOC 2011 dataset. + The face landmarks are from dlib's shape_predictor_68_face_landmarks.dat + landmarking model. The model uses the 68 landmark scheme used by the iBUG + 300-W dataset. +</comment> +<images> + <image file='2008_002470.jpg'> + <box top='181' left='274' width='52' height='53'> + <part name='00' x='277' y='194'/> + <part name='01' x='278' y='200'/> + <part name='02' x='278' y='206'/> + <part name='03' x='279' y='212'/> + <part name='04' x='281' y='218'/> + <part name='05' x='285' y='223'/> + <part name='06' x='289' y='227'/> + <part name='07' x='294' y='231'/> + <part name='08' x='300' y='232'/> + <part name='09' x='306' y='231'/> + <part name='10' x='312' y='228'/> + <part name='11' x='317' y='223'/> + <part name='12' x='321' y='218'/> + <part name='13' x='323' y='212'/> + <part name='14' x='324' y='205'/> + <part name='15' x='325' y='198'/> + <part name='16' x='325' y='192'/> + <part name='17' x='280' y='190'/> + <part name='18' x='282' y='188'/> + <part name='19' x='286' y='187'/> + <part name='20' x='290' y='187'/> + <part name='21' x='294' y='189'/> + <part name='22' x='303' y='189'/> + <part name='23' x='306' y='187'/> + <part name='24' x='311' y='187'/> + <part name='25' x='315' y='188'/> + <part name='26' x='318' y='190'/> + <part name='27' x='298' y='194'/> + <part name='28' x='298' y='199'/> + <part name='29' x='298' y='204'/> + <part name='30' x='298' y='209'/> + <part name='31' x='293' y='210'/> + <part name='32' x='296' y='211'/> + <part name='33' x='298' y='212'/> + <part name='34' x='301' y='211'/> + <part name='35' x='304' y='210'/> + <part name='36' x='285' y='195'/> + <part name='37' x='287' y='194'/> + <part name='38' x='290' y='194'/> + <part name='39' x='292' y='195'/> + <part name='40' x='290' y='196'/> + <part name='41' x='287' y='196'/> + <part name='42' x='306' y='195'/> + <part name='43' x='308' y='194'/> + <part name='44' x='311' y='194'/> + <part name='45' x='313' y='195'/> + <part name='46' x='311' y='195'/> + <part name='47' x='308' y='196'/> + <part name='48' x='291' y='216'/> + <part name='49' x='294' y='216'/> + <part name='50' x='297' y='216'/> + <part name='51' x='299' y='216'/> + <part name='52' x='302' y='216'/> + <part name='53' x='306' y='216'/> + <part name='54' x='310' y='216'/> + <part name='55' x='306' y='220'/> + <part name='56' x='302' y='221'/> + <part name='57' x='299' y='221'/> + <part name='58' x='297' y='221'/> + <part name='59' x='293' y='220'/> + <part name='60' x='292' y='217'/> + <part name='61' x='297' y='218'/> + <part name='62' x='299' y='218'/> + <part name='63' x='302' y='217'/> + <part name='64' x='308' y='217'/> + <part name='65' x='302' y='218'/> + <part name='66' x='299' y='218'/> + <part name='67' x='297' y='218'/> + </box> + <box top='156' left='55' width='44' height='44'> + <part name='00' x='54' y='170'/> + <part name='01' x='55' y='176'/> + <part name='02' x='55' y='182'/> + <part name='03' x='57' y='188'/> + <part name='04' x='59' y='193'/> + <part name='05' x='63' y='197'/> + <part name='06' x='68' y='201'/> + <part name='07' x='73' y='203'/> + <part name='08' x='79' y='204'/> + <part name='09' x='84' y='202'/> + <part name='10' x='88' y='199'/> + <part name='11' x='91' y='195'/> + <part name='12' x='94' y='191'/> + <part name='13' x='95' y='186'/> + <part name='14' x='96' y='181'/> + <part name='15' x='97' y='176'/> + <part name='16' x='97' y='171'/> + <part name='17' x='61' y='166'/> + <part name='18' x='64' y='164'/> + <part name='19' x='68' y='163'/> + <part name='20' x='72' y='163'/> + <part name='21' x='76' y='165'/> + <part name='22' x='85' y='166'/> + <part name='23' x='88' y='164'/> + <part name='24' x='91' y='164'/> + <part name='25' x='94' y='165'/> + <part name='26' x='96' y='167'/> + <part name='27' x='80' y='170'/> + <part name='28' x='81' y='173'/> + <part name='29' x='81' y='176'/> + <part name='30' x='81' y='180'/> + <part name='31' x='76' y='182'/> + <part name='32' x='78' y='183'/> + <part name='33' x='80' y='184'/> + <part name='34' x='82' y='183'/> + <part name='35' x='84' y='183'/> + <part name='36' x='65' y='170'/> + <part name='37' x='68' y='169'/> + <part name='38' x='71' y='169'/> + <part name='39' x='73' y='170'/> + <part name='40' x='71' y='171'/> + <part name='41' x='68' y='171'/> + <part name='42' x='85' y='171'/> + <part name='43' x='88' y='169'/> + <part name='44' x='90' y='169'/> + <part name='45' x='92' y='171'/> + <part name='46' x='90' y='171'/> + <part name='47' x='88' y='171'/> + <part name='48' x='71' y='190'/> + <part name='49' x='75' y='188'/> + <part name='50' x='78' y='188'/> + <part name='51' x='80' y='188'/> + <part name='52' x='82' y='188'/> + <part name='53' x='85' y='188'/> + <part name='54' x='87' y='189'/> + <part name='55' x='85' y='191'/> + <part name='56' x='82' y='192'/> + <part name='57' x='80' y='192'/> + <part name='58' x='78' y='192'/> + <part name='59' x='74' y='192'/> + <part name='60' x='72' y='190'/> + <part name='61' x='78' y='189'/> + <part name='62' x='80' y='189'/> + <part name='63' x='82' y='189'/> + <part name='64' x='86' y='189'/> + <part name='65' x='82' y='189'/> + <part name='66' x='80' y='190'/> + <part name='67' x='78' y='190'/> + </box> + <box top='166' left='146' width='37' height='37'> + <part name='00' x='152' y='181'/> + <part name='01' x='152' y='185'/> + <part name='02' x='153' y='188'/> + <part name='03' x='154' y='192'/> + <part name='04' x='156' y='195'/> + <part name='05' x='159' y='198'/> + <part name='06' x='162' y='201'/> + <part name='07' x='165' y='203'/> + <part name='08' x='168' y='203'/> + <part name='09' x='172' y='202'/> + <part name='10' x='176' y='200'/> + <part name='11' x='179' y='198'/> + <part name='12' x='181' y='195'/> + <part name='13' x='183' y='191'/> + <part name='14' x='184' y='187'/> + <part name='15' x='184' y='183'/> + <part name='16' x='184' y='178'/> + <part name='17' x='154' y='176'/> + <part name='18' x='155' y='174'/> + <part name='19' x='157' y='174'/> + <part name='20' x='160' y='174'/> + <part name='21' x='162' y='175'/> + <part name='22' x='170' y='174'/> + <part name='23' x='172' y='172'/> + <part name='24' x='175' y='172'/> + <part name='25' x='177' y='173'/> + <part name='26' x='179' y='174'/> + <part name='27' x='166' y='178'/> + <part name='28' x='166' y='180'/> + <part name='29' x='166' y='183'/> + <part name='30' x='166' y='185'/> + <part name='31' x='163' y='187'/> + <part name='32' x='165' y='188'/> + <part name='33' x='166' y='188'/> + <part name='34' x='168' y='188'/> + <part name='35' x='170' y='187'/> + <part name='36' x='157' y='179'/> + <part name='37' x='158' y='178'/> + <part name='38' x='160' y='178'/> + <part name='39' x='162' y='179'/> + <part name='40' x='160' y='179'/> + <part name='41' x='158' y='180'/> + <part name='42' x='171' y='178'/> + <part name='43' x='172' y='177'/> + <part name='44' x='174' y='177'/> + <part name='45' x='176' y='178'/> + <part name='46' x='175' y='178'/> + <part name='47' x='173' y='178'/> + <part name='48' x='161' y='193'/> + <part name='49' x='163' y='191'/> + <part name='50' x='165' y='191'/> + <part name='51' x='167' y='191'/> + <part name='52' x='168' y='190'/> + <part name='53' x='171' y='191'/> + <part name='54' x='173' y='191'/> + <part name='55' x='171' y='194'/> + <part name='56' x='169' y='195'/> + <part name='57' x='167' y='195'/> + <part name='58' x='165' y='195'/> + <part name='59' x='163' y='195'/> + <part name='60' x='162' y='193'/> + <part name='61' x='165' y='192'/> + <part name='62' x='167' y='192'/> + <part name='63' x='168' y='191'/> + <part name='64' x='172' y='192'/> + <part name='65' x='169' y='193'/> + <part name='66' x='167' y='193'/> + <part name='67' x='165' y='193'/> + </box> + <box top='55' left='329' width='44' height='44'> + <part name='00' x='327' y='73'/> + <part name='01' x='328' y='78'/> + <part name='02' x='330' y='83'/> + <part name='03' x='332' y='88'/> + <part name='04' x='334' y='93'/> + <part name='05' x='338' y='97'/> + <part name='06' x='342' y='100'/> + <part name='07' x='347' y='102'/> + <part name='08' x='352' y='102'/> + <part name='09' x='358' y='100'/> + <part name='10' x='363' y='97'/> + <part name='11' x='366' y='93'/> + <part name='12' x='369' y='89'/> + <part name='13' x='371' y='83'/> + <part name='14' x='371' y='77'/> + <part name='15' x='371' y='71'/> + <part name='16' x='371' y='65'/> + <part name='17' x='329' y='66'/> + <part name='18' x='330' y='63'/> + <part name='19' x='334' y='60'/> + <part name='20' x='338' y='60'/> + <part name='21' x='342' y='60'/> + <part name='22' x='349' y='59'/> + <part name='23' x='353' y='57'/> + <part name='24' x='357' y='56'/> + <part name='25' x='362' y='57'/> + <part name='26' x='365' y='60'/> + <part name='27' x='346' y='65'/> + <part name='28' x='346' y='69'/> + <part name='29' x='347' y='72'/> + <part name='30' x='347' y='76'/> + <part name='31' x='344' y='80'/> + <part name='32' x='346' y='80'/> + <part name='33' x='348' y='80'/> + <part name='34' x='350' y='79'/> + <part name='35' x='352' y='78'/> + <part name='36' x='334' y='69'/> + <part name='37' x='336' y='67'/> + <part name='38' x='338' y='67'/> + <part name='39' x='341' y='67'/> + <part name='40' x='339' y='68'/> + <part name='41' x='336' y='69'/> + <part name='42' x='353' y='65'/> + <part name='43' x='355' y='64'/> + <part name='44' x='357' y='63'/> + <part name='45' x='360' y='64'/> + <part name='46' x='358' y='65'/> + <part name='47' x='355' y='65'/> + <part name='48' x='342' y='88'/> + <part name='49' x='344' y='86'/> + <part name='50' x='347' y='84'/> + <part name='51' x='349' y='84'/> + <part name='52' x='351' y='83'/> + <part name='53' x='355' y='84'/> + <part name='54' x='358' y='85'/> + <part name='55' x='356' y='88'/> + <part name='56' x='353' y='90'/> + <part name='57' x='350' y='90'/> + <part name='58' x='348' y='91'/> + <part name='59' x='345' y='90'/> + <part name='60' x='344' y='88'/> + <part name='61' x='347' y='87'/> + <part name='62' x='350' y='86'/> + <part name='63' x='352' y='86'/> + <part name='64' x='356' y='86'/> + <part name='65' x='352' y='87'/> + <part name='66' x='350' y='87'/> + <part name='67' x='348' y='87'/> + </box> + <box top='74' left='233' width='44' height='44'> + <part name='00' x='239' y='92'/> + <part name='01' x='240' y='97'/> + <part name='02' x='241' y='101'/> + <part name='03' x='243' y='104'/> + <part name='04' x='245' y='108'/> + <part name='05' x='248' y='111'/> + <part name='06' x='252' y='112'/> + <part name='07' x='256' y='114'/> + <part name='08' x='260' y='113'/> + <part name='09' x='265' y='112'/> + <part name='10' x='268' y='109'/> + <part name='11' x='272' y='106'/> + <part name='12' x='274' y='102'/> + <part name='13' x='276' y='98'/> + <part name='14' x='276' y='93'/> + <part name='15' x='275' y='88'/> + <part name='16' x='274' y='83'/> + <part name='17' x='239' y='89'/> + <part name='18' x='240' y='86'/> + <part name='19' x='242' y='84'/> + <part name='20' x='246' y='84'/> + <part name='21' x='249' y='84'/> + <part name='22' x='253' y='82'/> + <part name='23' x='256' y='80'/> + <part name='24' x='259' y='79'/> + <part name='25' x='263' y='78'/> + <part name='26' x='267' y='80'/> + <part name='27' x='252' y='86'/> + <part name='28' x='252' y='88'/> + <part name='29' x='253' y='91'/> + <part name='30' x='253' y='93'/> + <part name='31' x='251' y='97'/> + <part name='32' x='253' y='97'/> + <part name='33' x='254' y='97'/> + <part name='34' x='256' y='96'/> + <part name='35' x='258' y='95'/> + <part name='36' x='243' y='90'/> + <part name='37' x='244' y='88'/> + <part name='38' x='246' y='88'/> + <part name='39' x='248' y='88'/> + <part name='40' x='247' y='89'/> + <part name='41' x='245' y='90'/> + <part name='42' x='258' y='85'/> + <part name='43' x='259' y='84'/> + <part name='44' x='261' y='83'/> + <part name='45' x='263' y='84'/> + <part name='46' x='262' y='85'/> + <part name='47' x='260' y='85'/> + <part name='48' x='251' y='104'/> + <part name='49' x='252' y='102'/> + <part name='50' x='254' y='100'/> + <part name='51' x='256' y='100'/> + <part name='52' x='258' y='100'/> + <part name='53' x='261' y='100'/> + <part name='54' x='264' y='100'/> + <part name='55' x='262' y='102'/> + <part name='56' x='259' y='103'/> + <part name='57' x='257' y='104'/> + <part name='58' x='255' y='104'/> + <part name='59' x='253' y='105'/> + <part name='60' x='252' y='104'/> + <part name='61' x='254' y='102'/> + <part name='62' x='256' y='102'/> + <part name='63' x='258' y='101'/> + <part name='64' x='263' y='101'/> + <part name='65' x='258' y='101'/> + <part name='66' x='256' y='102'/> + <part name='67' x='254' y='102'/> + </box> + <box top='86' left='178' width='37' height='37'> + <part name='00' x='184' y='101'/> + <part name='01' x='184' y='104'/> + <part name='02' x='185' y='108'/> + <part name='03' x='187' y='111'/> + <part name='04' x='188' y='115'/> + <part name='05' x='190' y='117'/> + <part name='06' x='193' y='120'/> + <part name='07' x='196' y='122'/> + <part name='08' x='199' y='122'/> + <part name='09' x='203' y='121'/> + <part name='10' x='207' y='118'/> + <part name='11' x='210' y='116'/> + <part name='12' x='213' y='113'/> + <part name='13' x='215' y='109'/> + <part name='14' x='216' y='105'/> + <part name='15' x='216' y='100'/> + <part name='16' x='216' y='96'/> + <part name='17' x='185' y='96'/> + <part name='18' x='186' y='94'/> + <part name='19' x='188' y='93'/> + <part name='20' x='190' y='93'/> + <part name='21' x='192' y='93'/> + <part name='22' x='197' y='92'/> + <part name='23' x='200' y='90'/> + <part name='24' x='203' y='89'/> + <part name='25' x='206' y='90'/> + <part name='26' x='208' y='92'/> + <part name='27' x='195' y='96'/> + <part name='28' x='195' y='99'/> + <part name='29' x='195' y='102'/> + <part name='30' x='195' y='104'/> + <part name='31' x='194' y='107'/> + <part name='32' x='195' y='107'/> + <part name='33' x='196' y='107'/> + <part name='34' x='198' y='107'/> + <part name='35' x='200' y='106'/> + <part name='36' x='188' y='99'/> + <part name='37' x='189' y='97'/> + <part name='38' x='191' y='97'/> + <part name='39' x='192' y='98'/> + <part name='40' x='191' y='99'/> + <part name='41' x='189' y='99'/> + <part name='42' x='200' y='97'/> + <part name='43' x='202' y='95'/> + <part name='44' x='203' y='95'/> + <part name='45' x='205' y='96'/> + <part name='46' x='204' y='96'/> + <part name='47' x='202' y='97'/> + <part name='48' x='193' y='113'/> + <part name='49' x='194' y='111'/> + <part name='50' x='196' y='111'/> + <part name='51' x='197' y='111'/> + <part name='52' x='199' y='110'/> + <part name='53' x='201' y='110'/> + <part name='54' x='204' y='111'/> + <part name='55' x='202' y='113'/> + <part name='56' x='199' y='114'/> + <part name='57' x='198' y='114'/> + <part name='58' x='196' y='114'/> + <part name='59' x='195' y='114'/> + <part name='60' x='194' y='113'/> + <part name='61' x='196' y='112'/> + <part name='62' x='197' y='112'/> + <part name='63' x='199' y='112'/> + <part name='64' x='203' y='111'/> + <part name='65' x='199' y='112'/> + <part name='66' x='197' y='112'/> + <part name='67' x='196' y='112'/> + </box> + </image> + <image file='2008_002506.jpg'> + <box top='78' left='329' width='109' height='109'> + <part name='00' x='342' y='134'/> + <part name='01' x='345' y='145'/> + <part name='02' x='347' y='155'/> + <part name='03' x='351' y='165'/> + <part name='04' x='357' y='175'/> + <part name='05' x='365' y='183'/> + <part name='06' x='375' y='190'/> + <part name='07' x='386' y='194'/> + <part name='08' x='399' y='193'/> + <part name='09' x='411' y='188'/> + <part name='10' x='423' y='180'/> + <part name='11' x='434' y='172'/> + <part name='12' x='443' y='161'/> + <part name='13' x='447' y='149'/> + <part name='14' x='446' y='134'/> + <part name='15' x='443' y='120'/> + <part name='16' x='441' y='106'/> + <part name='17' x='342' y='125'/> + <part name='18' x='343' y='116'/> + <part name='19' x='350' y='109'/> + <part name='20' x='359' y='105'/> + <part name='21' x='368' y='105'/> + <part name='22' x='380' y='99'/> + <part name='23' x='390' y='93'/> + <part name='24' x='402' y='91'/> + <part name='25' x='413' y='92'/> + <part name='26' x='422' y='99'/> + <part name='27' x='375' y='112'/> + <part name='28' x='376' y='119'/> + <part name='29' x='376' y='126'/> + <part name='30' x='377' y='133'/> + <part name='31' x='372' y='142'/> + <part name='32' x='377' y='142'/> + <part name='33' x='382' y='142'/> + <part name='34' x='387' y='139'/> + <part name='35' x='393' y='136'/> + <part name='36' x='353' y='127'/> + <part name='37' x='356' y='122'/> + <part name='38' x='362' y='120'/> + <part name='39' x='368' y='120'/> + <part name='40' x='363' y='123'/> + <part name='41' x='358' y='125'/> + <part name='42' x='393' y='112'/> + <part name='43' x='397' y='107'/> + <part name='44' x='403' y='106'/> + <part name='45' x='409' y='107'/> + <part name='46' x='404' y='108'/> + <part name='47' x='399' y='110'/> + <part name='48' x='369' y='159'/> + <part name='49' x='373' y='154'/> + <part name='50' x='380' y='151'/> + <part name='51' x='386' y='150'/> + <part name='52' x='392' y='147'/> + <part name='53' x='403' y='146'/> + <part name='54' x='416' y='146'/> + <part name='55' x='407' y='156'/> + <part name='56' x='398' y='161'/> + <part name='57' x='391' y='163'/> + <part name='58' x='385' y='164'/> + <part name='59' x='377' y='165'/> + <part name='60' x='371' y='158'/> + <part name='61' x='381' y='152'/> + <part name='62' x='387' y='151'/> + <part name='63' x='394' y='149'/> + <part name='64' x='413' y='148'/> + <part name='65' x='395' y='157'/> + <part name='66' x='389' y='160'/> + <part name='67' x='383' y='161'/> + </box> + <box top='95' left='224' width='91' height='91'> + <part name='00' x='227' y='134'/> + <part name='01' x='229' y='145'/> + <part name='02' x='231' y='155'/> + <part name='03' x='233' y='166'/> + <part name='04' x='238' y='175'/> + <part name='05' x='246' y='182'/> + <part name='06' x='255' y='188'/> + <part name='07' x='264' y='192'/> + <part name='08' x='274' y='193'/> + <part name='09' x='283' y='190'/> + <part name='10' x='291' y='184'/> + <part name='11' x='298' y='177'/> + <part name='12' x='304' y='169'/> + <part name='13' x='308' y='159'/> + <part name='14' x='308' y='148'/> + <part name='15' x='308' y='137'/> + <part name='16' x='308' y='125'/> + <part name='17' x='232' y='123'/> + <part name='18' x='236' y='118'/> + <part name='19' x='242' y='116'/> + <part name='20' x='249' y='116'/> + <part name='21' x='257' y='118'/> + <part name='22' x='273' y='117'/> + <part name='23' x='280' y='113'/> + <part name='24' x='287' y='111'/> + <part name='25' x='295' y='111'/> + <part name='26' x='301' y='116'/> + <part name='27' x='265' y='125'/> + <part name='28' x='266' y='130'/> + <part name='29' x='267' y='136'/> + <part name='30' x='268' y='142'/> + <part name='31' x='260' y='147'/> + <part name='32' x='264' y='148'/> + <part name='33' x='269' y='149'/> + <part name='34' x='273' y='147'/> + <part name='35' x='277' y='146'/> + <part name='36' x='241' y='129'/> + <part name='37' x='245' y='127'/> + <part name='38' x='250' y='126'/> + <part name='39' x='255' y='128'/> + <part name='40' x='250' y='129'/> + <part name='41' x='246' y='129'/> + <part name='42' x='278' y='126'/> + <part name='43' x='282' y='123'/> + <part name='44' x='287' y='122'/> + <part name='45' x='291' y='124'/> + <part name='46' x='287' y='125'/> + <part name='47' x='282' y='125'/> + <part name='48' x='253' y='164'/> + <part name='49' x='258' y='158'/> + <part name='50' x='265' y='156'/> + <part name='51' x='270' y='156'/> + <part name='52' x='275' y='155'/> + <part name='53' x='283' y='156'/> + <part name='54' x='290' y='160'/> + <part name='55' x='285' y='170'/> + <part name='56' x='278' y='174'/> + <part name='57' x='272' y='175'/> + <part name='58' x='266' y='175'/> + <part name='59' x='259' y='172'/> + <part name='60' x='255' y='163'/> + <part name='61' x='265' y='158'/> + <part name='62' x='270' y='158'/> + <part name='63' x='276' y='157'/> + <part name='64' x='288' y='160'/> + <part name='65' x='277' y='170'/> + <part name='66' x='271' y='170'/> + <part name='67' x='266' y='170'/> + </box> + <box top='65' left='125' width='90' height='91'> + <part name='00' x='117' y='92'/> + <part name='01' x='116' y='105'/> + <part name='02' x='115' y='117'/> + <part name='03' x='115' y='130'/> + <part name='04' x='120' y='140'/> + <part name='05' x='129' y='147'/> + <part name='06' x='140' y='152'/> + <part name='07' x='151' y='157'/> + <part name='08' x='161' y='160'/> + <part name='09' x='170' y='161'/> + <part name='10' x='179' y='158'/> + <part name='11' x='187' y='154'/> + <part name='12' x='193' y='147'/> + <part name='13' x='198' y='139'/> + <part name='14' x='200' y='130'/> + <part name='15' x='202' y='121'/> + <part name='16' x='202' y='113'/> + <part name='17' x='137' y='76'/> + <part name='18' x='145' y='70'/> + <part name='19' x='155' y='68'/> + <part name='20' x='164' y='70'/> + <part name='21' x='173' y='75'/> + <part name='22' x='186' y='81'/> + <part name='23' x='193' y='81'/> + <part name='24' x='199' y='84'/> + <part name='25' x='204' y='90'/> + <part name='26' x='204' y='98'/> + <part name='27' x='179' y='90'/> + <part name='28' x='179' y='96'/> + <part name='29' x='180' y='102'/> + <part name='30' x='180' y='108'/> + <part name='31' x='165' y='112'/> + <part name='32' x='170' y='115'/> + <part name='33' x='174' y='117'/> + <part name='34' x='179' y='118'/> + <part name='35' x='183' y='117'/> + <part name='36' x='148' y='85'/> + <part name='37' x='153' y='84'/> + <part name='38' x='159' y='85'/> + <part name='39' x='163' y='90'/> + <part name='40' x='157' y='89'/> + <part name='41' x='152' y='87'/> + <part name='42' x='183' y='98'/> + <part name='43' x='188' y='96'/> + <part name='44' x='193' y='98'/> + <part name='45' x='196' y='103'/> + <part name='46' x='192' y='102'/> + <part name='47' x='187' y='100'/> + <part name='48' x='146' y='123'/> + <part name='49' x='157' y='121'/> + <part name='50' x='167' y='123'/> + <part name='51' x='171' y='125'/> + <part name='52' x='176' y='125'/> + <part name='53' x='181' y='129'/> + <part name='54' x='184' y='134'/> + <part name='55' x='178' y='139'/> + <part name='56' x='171' y='139'/> + <part name='57' x='167' y='138'/> + <part name='58' x='161' y='136'/> + <part name='59' x='153' y='132'/> + <part name='60' x='148' y='124'/> + <part name='61' x='165' y='125'/> + <part name='62' x='170' y='127'/> + <part name='63' x='174' y='128'/> + <part name='64' x='182' y='133'/> + <part name='65' x='173' y='135'/> + <part name='66' x='168' y='134'/> + <part name='67' x='163' y='133'/> + </box> + </image> + <image file='2008_004176.jpg'> + <box top='230' left='206' width='37' height='37'> + <part name='00' x='206' y='241'/> + <part name='01' x='206' y='245'/> + <part name='02' x='206' y='250'/> + <part name='03' x='206' y='254'/> + <part name='04' x='207' y='259'/> + <part name='05' x='209' y='262'/> + <part name='06' x='212' y='265'/> + <part name='07' x='216' y='267'/> + <part name='08' x='219' y='269'/> + <part name='09' x='224' y='269'/> + <part name='10' x='228' y='267'/> + <part name='11' x='232' y='264'/> + <part name='12' x='235' y='261'/> + <part name='13' x='237' y='257'/> + <part name='14' x='238' y='253'/> + <part name='15' x='239' y='249'/> + <part name='16' x='240' y='244'/> + <part name='17' x='209' y='237'/> + <part name='18' x='212' y='236'/> + <part name='19' x='214' y='236'/> + <part name='20' x='217' y='236'/> + <part name='21' x='219' y='238'/> + <part name='22' x='227' y='238'/> + <part name='23' x='229' y='237'/> + <part name='24' x='232' y='237'/> + <part name='25' x='235' y='238'/> + <part name='26' x='237' y='240'/> + <part name='27' x='222' y='241'/> + <part name='28' x='222' y='244'/> + <part name='29' x='222' y='247'/> + <part name='30' x='221' y='250'/> + <part name='31' x='218' y='251'/> + <part name='32' x='220' y='252'/> + <part name='33' x='221' y='252'/> + <part name='34' x='223' y='252'/> + <part name='35' x='225' y='252'/> + <part name='36' x='212' y='241'/> + <part name='37' x='214' y='240'/> + <part name='38' x='216' y='240'/> + <part name='39' x='218' y='242'/> + <part name='40' x='216' y='242'/> + <part name='41' x='214' y='242'/> + <part name='42' x='227' y='243'/> + <part name='43' x='229' y='242'/> + <part name='44' x='231' y='242'/> + <part name='45' x='233' y='243'/> + <part name='46' x='231' y='243'/> + <part name='47' x='229' y='243'/> + <part name='48' x='213' y='256'/> + <part name='49' x='216' y='254'/> + <part name='50' x='219' y='254'/> + <part name='51' x='221' y='255'/> + <part name='52' x='223' y='254'/> + <part name='53' x='226' y='255'/> + <part name='54' x='229' y='257'/> + <part name='55' x='226' y='260'/> + <part name='56' x='223' y='261'/> + <part name='57' x='221' y='261'/> + <part name='58' x='218' y='261'/> + <part name='59' x='215' y='259'/> + <part name='60' x='214' y='256'/> + <part name='61' x='219' y='255'/> + <part name='62' x='221' y='256'/> + <part name='63' x='223' y='256'/> + <part name='64' x='228' y='257'/> + <part name='65' x='223' y='260'/> + <part name='66' x='221' y='260'/> + <part name='67' x='218' y='259'/> + </box> + <box top='118' left='162' width='37' height='37'> + <part name='00' x='164' y='132'/> + <part name='01' x='164' y='136'/> + <part name='02' x='165' y='139'/> + <part name='03' x='165' y='143'/> + <part name='04' x='167' y='147'/> + <part name='05' x='170' y='150'/> + <part name='06' x='173' y='152'/> + <part name='07' x='177' y='154'/> + <part name='08' x='181' y='155'/> + <part name='09' x='186' y='154'/> + <part name='10' x='190' y='152'/> + <part name='11' x='193' y='150'/> + <part name='12' x='196' y='147'/> + <part name='13' x='197' y='143'/> + <part name='14' x='198' y='139'/> + <part name='15' x='198' y='135'/> + <part name='16' x='197' y='131'/> + <part name='17' x='167' y='126'/> + <part name='18' x='169' y='124'/> + <part name='19' x='172' y='123'/> + <part name='20' x='175' y='123'/> + <part name='21' x='177' y='124'/> + <part name='22' x='183' y='125'/> + <part name='23' x='186' y='124'/> + <part name='24' x='189' y='123'/> + <part name='25' x='192' y='124'/> + <part name='26' x='194' y='126'/> + <part name='27' x='181' y='128'/> + <part name='28' x='181' y='131'/> + <part name='29' x='181' y='133'/> + <part name='30' x='181' y='136'/> + <part name='31' x='177' y='138'/> + <part name='32' x='179' y='139'/> + <part name='33' x='181' y='139'/> + <part name='34' x='182' y='138'/> + <part name='35' x='184' y='138'/> + <part name='36' x='171' y='129'/> + <part name='37' x='172' y='129'/> + <part name='38' x='174' y='129'/> + <part name='39' x='176' y='129'/> + <part name='40' x='174' y='130'/> + <part name='41' x='172' y='130'/> + <part name='42' x='185' y='129'/> + <part name='43' x='187' y='129'/> + <part name='44' x='189' y='129'/> + <part name='45' x='191' y='129'/> + <part name='46' x='189' y='130'/> + <part name='47' x='187' y='130'/> + <part name='48' x='174' y='143'/> + <part name='49' x='177' y='141'/> + <part name='50' x='179' y='141'/> + <part name='51' x='181' y='141'/> + <part name='52' x='183' y='141'/> + <part name='53' x='185' y='141'/> + <part name='54' x='188' y='142'/> + <part name='55' x='186' y='144'/> + <part name='56' x='183' y='144'/> + <part name='57' x='181' y='144'/> + <part name='58' x='179' y='144'/> + <part name='59' x='177' y='144'/> + <part name='60' x='175' y='143'/> + <part name='61' x='179' y='142'/> + <part name='62' x='181' y='142'/> + <part name='63' x='183' y='142'/> + <part name='64' x='187' y='143'/> + <part name='65' x='183' y='143'/> + <part name='66' x='181' y='143'/> + <part name='67' x='179' y='143'/> + </box> + <box top='82' left='190' width='37' height='37'> + <part name='00' x='194' y='91'/> + <part name='01' x='195' y='95'/> + <part name='02' x='195' y='99'/> + <part name='03' x='196' y='104'/> + <part name='04' x='197' y='108'/> + <part name='05' x='199' y='111'/> + <part name='06' x='202' y='114'/> + <part name='07' x='206' y='116'/> + <part name='08' x='210' y='117'/> + <part name='09' x='213' y='116'/> + <part name='10' x='217' y='114'/> + <part name='11' x='220' y='111'/> + <part name='12' x='222' y='107'/> + <part name='13' x='223' y='103'/> + <part name='14' x='224' y='99'/> + <part name='15' x='224' y='96'/> + <part name='16' x='225' y='91'/> + <part name='17' x='197' y='88'/> + <part name='18' x='199' y='87'/> + <part name='19' x='202' y='86'/> + <part name='20' x='204' y='86'/> + <part name='21' x='207' y='87'/> + <part name='22' x='213' y='87'/> + <part name='23' x='215' y='86'/> + <part name='24' x='217' y='86'/> + <part name='25' x='220' y='87'/> + <part name='26' x='222' y='88'/> + <part name='27' x='210' y='90'/> + <part name='28' x='210' y='93'/> + <part name='29' x='210' y='96'/> + <part name='30' x='210' y='100'/> + <part name='31' x='206' y='100'/> + <part name='32' x='208' y='101'/> + <part name='33' x='209' y='102'/> + <part name='34' x='211' y='101'/> + <part name='35' x='212' y='101'/> + <part name='36' x='200' y='91'/> + <part name='37' x='202' y='90'/> + <part name='38' x='204' y='90'/> + <part name='39' x='205' y='91'/> + <part name='40' x='204' y='92'/> + <part name='41' x='202' y='92'/> + <part name='42' x='214' y='91'/> + <part name='43' x='215' y='90'/> + <part name='44' x='217' y='90'/> + <part name='45' x='219' y='91'/> + <part name='46' x='217' y='92'/> + <part name='47' x='215' y='92'/> + <part name='48' x='202' y='105'/> + <part name='49' x='205' y='104'/> + <part name='50' x='207' y='104'/> + <part name='51' x='209' y='104'/> + <part name='52' x='211' y='104'/> + <part name='53' x='214' y='104'/> + <part name='54' x='216' y='106'/> + <part name='55' x='214' y='109'/> + <part name='56' x='211' y='110'/> + <part name='57' x='209' y='111'/> + <part name='58' x='207' y='110'/> + <part name='59' x='204' y='109'/> + <part name='60' x='203' y='105'/> + <part name='61' x='207' y='105'/> + <part name='62' x='209' y='105'/> + <part name='63' x='211' y='105'/> + <part name='64' x='215' y='106'/> + <part name='65' x='211' y='109'/> + <part name='66' x='209' y='109'/> + <part name='67' x='207' y='109'/> + </box> + <box top='78' left='326' width='37' height='37'> + <part name='00' x='330' y='94'/> + <part name='01' x='331' y='98'/> + <part name='02' x='332' y='101'/> + <part name='03' x='333' y='105'/> + <part name='04' x='334' y='109'/> + <part name='05' x='336' y='112'/> + <part name='06' x='338' y='115'/> + <part name='07' x='341' y='117'/> + <part name='08' x='345' y='118'/> + <part name='09' x='349' y='117'/> + <part name='10' x='353' y='115'/> + <part name='11' x='357' y='112'/> + <part name='12' x='360' y='108'/> + <part name='13' x='362' y='104'/> + <part name='14' x='363' y='99'/> + <part name='15' x='363' y='94'/> + <part name='16' x='363' y='89'/> + <part name='17' x='330' y='90'/> + <part name='18' x='330' y='88'/> + <part name='19' x='332' y='87'/> + <part name='20' x='335' y='87'/> + <part name='21' x='337' y='87'/> + <part name='22' x='342' y='86'/> + <part name='23' x='345' y='85'/> + <part name='24' x='348' y='85'/> + <part name='25' x='351' y='85'/> + <part name='26' x='354' y='86'/> + <part name='27' x='340' y='90'/> + <part name='28' x='340' y='92'/> + <part name='29' x='340' y='95'/> + <part name='30' x='340' y='98'/> + <part name='31' x='338' y='100'/> + <part name='32' x='339' y='101'/> + <part name='33' x='341' y='101'/> + <part name='34' x='343' y='100'/> + <part name='35' x='344' y='99'/> + <part name='36' x='332' y='92'/> + <part name='37' x='334' y='91'/> + <part name='38' x='335' y='90'/> + <part name='39' x='337' y='91'/> + <part name='40' x='336' y='92'/> + <part name='41' x='334' y='92'/> + <part name='42' x='346' y='90'/> + <part name='43' x='347' y='89'/> + <part name='44' x='349' y='89'/> + <part name='45' x='351' y='89'/> + <part name='46' x='349' y='90'/> + <part name='47' x='347' y='90'/> + <part name='48' x='338' y='107'/> + <part name='49' x='339' y='105'/> + <part name='50' x='340' y='104'/> + <part name='51' x='342' y='104'/> + <part name='52' x='343' y='104'/> + <part name='53' x='346' y='104'/> + <part name='54' x='349' y='105'/> + <part name='55' x='347' y='107'/> + <part name='56' x='345' y='109'/> + <part name='57' x='343' y='109'/> + <part name='58' x='341' y='109'/> + <part name='59' x='339' y='109'/> + <part name='60' x='339' y='107'/> + <part name='61' x='341' y='105'/> + <part name='62' x='342' y='105'/> + <part name='63' x='344' y='105'/> + <part name='64' x='348' y='105'/> + <part name='65' x='344' y='107'/> + <part name='66' x='343' y='107'/> + <part name='67' x='341' y='107'/> + </box> + <box top='98' left='222' width='37' height='37'> + <part name='00' x='226' y='108'/> + <part name='01' x='226' y='112'/> + <part name='02' x='227' y='116'/> + <part name='03' x='228' y='119'/> + <part name='04' x='229' y='123'/> + <part name='05' x='230' y='126'/> + <part name='06' x='233' y='129'/> + <part name='07' x='236' y='131'/> + <part name='08' x='239' y='131'/> + <part name='09' x='243' y='131'/> + <part name='10' x='247' y='129'/> + <part name='11' x='251' y='127'/> + <part name='12' x='254' y='124'/> + <part name='13' x='256' y='120'/> + <part name='14' x='257' y='116'/> + <part name='15' x='258' y='112'/> + <part name='16' x='258' y='107'/> + <part name='17' x='227' y='104'/> + <part name='18' x='228' y='103'/> + <part name='19' x='230' y='102'/> + <part name='20' x='233' y='102'/> + <part name='21' x='235' y='103'/> + <part name='22' x='241' y='103'/> + <part name='23' x='244' y='102'/> + <part name='24' x='247' y='101'/> + <part name='25' x='250' y='102'/> + <part name='26' x='252' y='104'/> + <part name='27' x='238' y='106'/> + <part name='28' x='237' y='108'/> + <part name='29' x='237' y='111'/> + <part name='30' x='237' y='113'/> + <part name='31' x='235' y='115'/> + <part name='32' x='236' y='116'/> + <part name='33' x='237' y='116'/> + <part name='34' x='239' y='116'/> + <part name='35' x='241' y='115'/> + <part name='36' x='229' y='107'/> + <part name='37' x='231' y='106'/> + <part name='38' x='233' y='106'/> + <part name='39' x='234' y='107'/> + <part name='40' x='233' y='107'/> + <part name='41' x='231' y='107'/> + <part name='42' x='243' y='107'/> + <part name='43' x='245' y='106'/> + <part name='44' x='247' y='106'/> + <part name='45' x='249' y='107'/> + <part name='46' x='247' y='107'/> + <part name='47' x='245' y='107'/> + <part name='48' x='233' y='121'/> + <part name='49' x='234' y='119'/> + <part name='50' x='236' y='118'/> + <part name='51' x='238' y='118'/> + <part name='52' x='240' y='118'/> + <part name='53' x='243' y='119'/> + <part name='54' x='246' y='121'/> + <part name='55' x='243' y='123'/> + <part name='56' x='240' y='124'/> + <part name='57' x='238' y='124'/> + <part name='58' x='236' y='124'/> + <part name='59' x='234' y='123'/> + <part name='60' x='234' y='121'/> + <part name='61' x='236' y='119'/> + <part name='62' x='238' y='119'/> + <part name='63' x='240' y='119'/> + <part name='64' x='245' y='121'/> + <part name='65' x='240' y='122'/> + <part name='66' x='238' y='122'/> + <part name='67' x='236' y='122'/> + </box> + <box top='86' left='110' width='37' height='37'> + <part name='00' x='107' y='92'/> + <part name='01' x='107' y='98'/> + <part name='02' x='108' y='103'/> + <part name='03' x='109' y='108'/> + <part name='04' x='111' y='112'/> + <part name='05' x='115' y='116'/> + <part name='06' x='119' y='119'/> + <part name='07' x='123' y='121'/> + <part name='08' x='128' y='122'/> + <part name='09' x='131' y='121'/> + <part name='10' x='133' y='118'/> + <part name='11' x='135' y='114'/> + <part name='12' x='137' y='111'/> + <part name='13' x='139' y='107'/> + <part name='14' x='140' y='104'/> + <part name='15' x='140' y='100'/> + <part name='16' x='141' y='97'/> + <part name='17' x='115' y='91'/> + <part name='18' x='119' y='89'/> + <part name='19' x='122' y='89'/> + <part name='20' x='126' y='90'/> + <part name='21' x='130' y='91'/> + <part name='22' x='135' y='92'/> + <part name='23' x='137' y='91'/> + <part name='24' x='139' y='91'/> + <part name='25' x='141' y='91'/> + <part name='26' x='141' y='93'/> + <part name='27' x='132' y='95'/> + <part name='28' x='132' y='98'/> + <part name='29' x='133' y='102'/> + <part name='30' x='133' y='105'/> + <part name='31' x='127' y='106'/> + <part name='32' x='129' y='106'/> + <part name='33' x='131' y='107'/> + <part name='34' x='133' y='107'/> + <part name='35' x='134' y='106'/> + <part name='36' x='119' y='95'/> + <part name='37' x='122' y='94'/> + <part name='38' x='124' y='94'/> + <part name='39' x='125' y='95'/> + <part name='40' x='123' y='96'/> + <part name='41' x='121' y='95'/> + <part name='42' x='134' y='96'/> + <part name='43' x='136' y='95'/> + <part name='44' x='138' y='95'/> + <part name='45' x='139' y='96'/> + <part name='46' x='138' y='97'/> + <part name='47' x='136' y='97'/> + <part name='48' x='122' y='110'/> + <part name='49' x='125' y='110'/> + <part name='50' x='128' y='110'/> + <part name='51' x='130' y='110'/> + <part name='52' x='131' y='109'/> + <part name='53' x='133' y='110'/> + <part name='54' x='133' y='110'/> + <part name='55' x='132' y='112'/> + <part name='56' x='131' y='113'/> + <part name='57' x='129' y='114'/> + <part name='58' x='128' y='113'/> + <part name='59' x='125' y='112'/> + <part name='60' x='123' y='110'/> + <part name='61' x='128' y='111'/> + <part name='62' x='130' y='111'/> + <part name='63' x='131' y='111'/> + <part name='64' x='133' y='111'/> + <part name='65' x='131' y='111'/> + <part name='66' x='130' y='112'/> + <part name='67' x='128' y='112'/> + </box> + <box top='102' left='282' width='37' height='37'> + <part name='00' x='284' y='110'/> + <part name='01' x='284' y='113'/> + <part name='02' x='284' y='117'/> + <part name='03' x='284' y='121'/> + <part name='04' x='285' y='125'/> + <part name='05' x='287' y='129'/> + <part name='06' x='289' y='132'/> + <part name='07' x='291' y='135'/> + <part name='08' x='295' y='136'/> + <part name='09' x='299' y='136'/> + <part name='10' x='303' y='134'/> + <part name='11' x='307' y='132'/> + <part name='12' x='310' y='129'/> + <part name='13' x='313' y='126'/> + <part name='14' x='314' y='122'/> + <part name='15' x='315' y='117'/> + <part name='16' x='316' y='113'/> + <part name='17' x='286' y='107'/> + <part name='18' x='287' y='105'/> + <part name='19' x='290' y='105'/> + <part name='20' x='292' y='106'/> + <part name='21' x='294' y='107'/> + <part name='22' x='301' y='107'/> + <part name='23' x='304' y='107'/> + <part name='24' x='307' y='107'/> + <part name='25' x='310' y='108'/> + <part name='26' x='312' y='110'/> + <part name='27' x='297' y='110'/> + <part name='28' x='297' y='113'/> + <part name='29' x='296' y='115'/> + <part name='30' x='296' y='118'/> + <part name='31' x='293' y='119'/> + <part name='32' x='295' y='120'/> + <part name='33' x='296' y='121'/> + <part name='34' x='298' y='120'/> + <part name='35' x='299' y='120'/> + <part name='36' x='288' y='110'/> + <part name='37' x='290' y='109'/> + <part name='38' x='292' y='109'/> + <part name='39' x='294' y='111'/> + <part name='40' x='292' y='111'/> + <part name='41' x='290' y='111'/> + <part name='42' x='302' y='112'/> + <part name='43' x='304' y='110'/> + <part name='44' x='306' y='111'/> + <part name='45' x='308' y='112'/> + <part name='46' x='306' y='112'/> + <part name='47' x='304' y='112'/> + <part name='48' x='290' y='123'/> + <part name='49' x='292' y='122'/> + <part name='50' x='294' y='123'/> + <part name='51' x='296' y='123'/> + <part name='52' x='298' y='123'/> + <part name='53' x='301' y='123'/> + <part name='54' x='304' y='124'/> + <part name='55' x='301' y='127'/> + <part name='56' x='298' y='128'/> + <part name='57' x='296' y='128'/> + <part name='58' x='294' y='128'/> + <part name='59' x='292' y='126'/> + <part name='60' x='291' y='123'/> + <part name='61' x='294' y='123'/> + <part name='62' x='296' y='124'/> + <part name='63' x='298' y='124'/> + <part name='64' x='303' y='124'/> + <part name='65' x='298' y='126'/> + <part name='66' x='296' y='126'/> + <part name='67' x='294' y='126'/> + </box> + </image> + <image file='2008_007676.jpg'> + <box top='62' left='226' width='37' height='37'> + <part name='00' x='223' y='72'/> + <part name='01' x='224' y='77'/> + <part name='02' x='224' y='82'/> + <part name='03' x='225' y='87'/> + <part name='04' x='227' y='91'/> + <part name='05' x='230' y='95'/> + <part name='06' x='234' y='98'/> + <part name='07' x='239' y='100'/> + <part name='08' x='243' y='100'/> + <part name='09' x='248' y='99'/> + <part name='10' x='252' y='97'/> + <part name='11' x='255' y='94'/> + <part name='12' x='258' y='90'/> + <part name='13' x='259' y='85'/> + <part name='14' x='260' y='81'/> + <part name='15' x='260' y='76'/> + <part name='16' x='260' y='71'/> + <part name='17' x='230' y='67'/> + <part name='18' x='232' y='65'/> + <part name='19' x='235' y='64'/> + <part name='20' x='238' y='64'/> + <part name='21' x='241' y='64'/> + <part name='22' x='248' y='65'/> + <part name='23' x='251' y='64'/> + <part name='24' x='254' y='64'/> + <part name='25' x='256' y='65'/> + <part name='26' x='258' y='67'/> + <part name='27' x='245' y='70'/> + <part name='28' x='245' y='73'/> + <part name='29' x='246' y='76'/> + <part name='30' x='246' y='79'/> + <part name='31' x='242' y='81'/> + <part name='32' x='244' y='82'/> + <part name='33' x='245' y='82'/> + <part name='34' x='247' y='82'/> + <part name='35' x='248' y='81'/> + <part name='36' x='234' y='71'/> + <part name='37' x='236' y='71'/> + <part name='38' x='238' y='71'/> + <part name='39' x='240' y='71'/> + <part name='40' x='238' y='72'/> + <part name='41' x='236' y='72'/> + <part name='42' x='249' y='71'/> + <part name='43' x='251' y='71'/> + <part name='44' x='253' y='71'/> + <part name='45' x='255' y='71'/> + <part name='46' x='253' y='71'/> + <part name='47' x='251' y='71'/> + <part name='48' x='236' y='87'/> + <part name='49' x='239' y='85'/> + <part name='50' x='243' y='85'/> + <part name='51' x='245' y='85'/> + <part name='52' x='247' y='85'/> + <part name='53' x='249' y='85'/> + <part name='54' x='251' y='87'/> + <part name='55' x='249' y='90'/> + <part name='56' x='247' y='92'/> + <part name='57' x='245' y='92'/> + <part name='58' x='242' y='92'/> + <part name='59' x='239' y='91'/> + <part name='60' x='237' y='87'/> + <part name='61' x='243' y='86'/> + <part name='62' x='245' y='86'/> + <part name='63' x='247' y='86'/> + <part name='64' x='250' y='87'/> + <part name='65' x='247' y='89'/> + <part name='66' x='245' y='90'/> + <part name='67' x='242' y='90'/> + </box> + <box top='113' left='194' width='44' height='44'> + <part name='00' x='191' y='124'/> + <part name='01' x='191' y='131'/> + <part name='02' x='191' y='137'/> + <part name='03' x='191' y='143'/> + <part name='04' x='193' y='149'/> + <part name='05' x='197' y='154'/> + <part name='06' x='202' y='157'/> + <part name='07' x='207' y='160'/> + <part name='08' x='213' y='161'/> + <part name='09' x='218' y='161'/> + <part name='10' x='223' y='158'/> + <part name='11' x='227' y='155'/> + <part name='12' x='230' y='150'/> + <part name='13' x='232' y='145'/> + <part name='14' x='233' y='140'/> + <part name='15' x='234' y='134'/> + <part name='16' x='235' y='128'/> + <part name='17' x='199' y='120'/> + <part name='18' x='203' y='118'/> + <part name='19' x='206' y='118'/> + <part name='20' x='210' y='118'/> + <part name='21' x='214' y='120'/> + <part name='22' x='222' y='122'/> + <part name='23' x='226' y='121'/> + <part name='24' x='229' y='121'/> + <part name='25' x='232' y='123'/> + <part name='26' x='234' y='125'/> + <part name='27' x='217' y='126'/> + <part name='28' x='217' y='130'/> + <part name='29' x='217' y='133'/> + <part name='30' x='217' y='137'/> + <part name='31' x='212' y='139'/> + <part name='32' x='214' y='139'/> + <part name='33' x='216' y='140'/> + <part name='34' x='218' y='140'/> + <part name='35' x='220' y='139'/> + <part name='36' x='204' y='126'/> + <part name='37' x='207' y='126'/> + <part name='38' x='209' y='126'/> + <part name='39' x='211' y='127'/> + <part name='40' x='209' y='127'/> + <part name='41' x='206' y='127'/> + <part name='42' x='222' y='128'/> + <part name='43' x='225' y='128'/> + <part name='44' x='227' y='128'/> + <part name='45' x='229' y='129'/> + <part name='46' x='227' y='129'/> + <part name='47' x='224' y='129'/> + <part name='48' x='203' y='143'/> + <part name='49' x='208' y='142'/> + <part name='50' x='212' y='142'/> + <part name='51' x='215' y='143'/> + <part name='52' x='218' y='143'/> + <part name='53' x='221' y='143'/> + <part name='54' x='224' y='145'/> + <part name='55' x='221' y='149'/> + <part name='56' x='217' y='150'/> + <part name='57' x='214' y='151'/> + <part name='58' x='211' y='150'/> + <part name='59' x='207' y='148'/> + <part name='60' x='204' y='143'/> + <part name='61' x='212' y='143'/> + <part name='62' x='215' y='144'/> + <part name='63' x='218' y='144'/> + <part name='64' x='223' y='145'/> + <part name='65' x='217' y='148'/> + <part name='66' x='214' y='148'/> + <part name='67' x='211' y='148'/> + </box> + <box top='130' left='262' width='37' height='37'> + <part name='00' x='265' y='138'/> + <part name='01' x='265' y='143'/> + <part name='02' x='265' y='147'/> + <part name='03' x='266' y='152'/> + <part name='04' x='268' y='156'/> + <part name='05' x='271' y='160'/> + <part name='06' x='274' y='164'/> + <part name='07' x='277' y='168'/> + <part name='08' x='281' y='169'/> + <part name='09' x='286' y='168'/> + <part name='10' x='291' y='165'/> + <part name='11' x='296' y='162'/> + <part name='12' x='299' y='157'/> + <part name='13' x='301' y='153'/> + <part name='14' x='302' y='147'/> + <part name='15' x='303' y='142'/> + <part name='16' x='303' y='137'/> + <part name='17' x='265' y='135'/> + <part name='18' x='267' y='133'/> + <part name='19' x='270' y='133'/> + <part name='20' x='273' y='134'/> + <part name='21' x='275' y='136'/> + <part name='22' x='282' y='136'/> + <part name='23' x='286' y='134'/> + <part name='24' x='289' y='134'/> + <part name='25' x='293' y='134'/> + <part name='26' x='296' y='135'/> + <part name='27' x='279' y='140'/> + <part name='28' x='279' y='143'/> + <part name='29' x='278' y='147'/> + <part name='30' x='278' y='150'/> + <part name='31' x='276' y='151'/> + <part name='32' x='277' y='152'/> + <part name='33' x='279' y='153'/> + <part name='34' x='281' y='152'/> + <part name='35' x='283' y='151'/> + <part name='36' x='269' y='140'/> + <part name='37' x='271' y='140'/> + <part name='38' x='273' y='140'/> + <part name='39' x='275' y='141'/> + <part name='40' x='273' y='141'/> + <part name='41' x='271' y='140'/> + <part name='42' x='285' y='141'/> + <part name='43' x='287' y='140'/> + <part name='44' x='290' y='140'/> + <part name='45' x='292' y='140'/> + <part name='46' x='290' y='141'/> + <part name='47' x='288' y='141'/> + <part name='48' x='273' y='156'/> + <part name='49' x='275' y='155'/> + <part name='50' x='278' y='154'/> + <part name='51' x='280' y='155'/> + <part name='52' x='282' y='154'/> + <part name='53' x='286' y='155'/> + <part name='54' x='290' y='156'/> + <part name='55' x='287' y='161'/> + <part name='56' x='283' y='163'/> + <part name='57' x='280' y='163'/> + <part name='58' x='278' y='163'/> + <part name='59' x='275' y='161'/> + <part name='60' x='274' y='156'/> + <part name='61' x='278' y='156'/> + <part name='62' x='280' y='156'/> + <part name='63' x='282' y='156'/> + <part name='64' x='289' y='156'/> + <part name='65' x='283' y='160'/> + <part name='66' x='280' y='160'/> + <part name='67' x='278' y='160'/> + </box> + <box top='134' left='366' width='37' height='37'> + <part name='00' x='367' y='140'/> + <part name='01' x='367' y='145'/> + <part name='02' x='367' y='149'/> + <part name='03' x='366' y='154'/> + <part name='04' x='367' y='159'/> + <part name='05' x='368' y='163'/> + <part name='06' x='371' y='167'/> + <part name='07' x='374' y='170'/> + <part name='08' x='378' y='172'/> + <part name='09' x='383' y='172'/> + <part name='10' x='388' y='170'/> + <part name='11' x='393' y='168'/> + <part name='12' x='397' y='165'/> + <part name='13' x='400' y='161'/> + <part name='14' x='401' y='156'/> + <part name='15' x='403' y='152'/> + <part name='16' x='405' y='147'/> + <part name='17' x='369' y='138'/> + <part name='18' x='371' y='136'/> + <part name='19' x='375' y='136'/> + <part name='20' x='378' y='136'/> + <part name='21' x='381' y='138'/> + <part name='22' x='389' y='139'/> + <part name='23' x='393' y='139'/> + <part name='24' x='397' y='139'/> + <part name='25' x='400' y='141'/> + <part name='26' x='402' y='144'/> + <part name='27' x='384' y='142'/> + <part name='28' x='383' y='145'/> + <part name='29' x='382' y='147'/> + <part name='30' x='381' y='150'/> + <part name='31' x='378' y='151'/> + <part name='32' x='379' y='152'/> + <part name='33' x='381' y='153'/> + <part name='34' x='383' y='153'/> + <part name='35' x='386' y='152'/> + <part name='36' x='373' y='141'/> + <part name='37' x='375' y='140'/> + <part name='38' x='377' y='140'/> + <part name='39' x='379' y='142'/> + <part name='40' x='377' y='142'/> + <part name='41' x='375' y='141'/> + <part name='42' x='390' y='143'/> + <part name='43' x='392' y='142'/> + <part name='44' x='395' y='143'/> + <part name='45' x='396' y='144'/> + <part name='46' x='394' y='144'/> + <part name='47' x='392' y='144'/> + <part name='48' x='372' y='155'/> + <part name='49' x='375' y='154'/> + <part name='50' x='379' y='155'/> + <part name='51' x='381' y='155'/> + <part name='52' x='384' y='155'/> + <part name='53' x='387' y='156'/> + <part name='54' x='391' y='158'/> + <part name='55' x='387' y='162'/> + <part name='56' x='383' y='163'/> + <part name='57' x='380' y='163'/> + <part name='58' x='377' y='162'/> + <part name='59' x='374' y='160'/> + <part name='60' x='373' y='156'/> + <part name='61' x='378' y='156'/> + <part name='62' x='381' y='156'/> + <part name='63' x='384' y='156'/> + <part name='64' x='390' y='158'/> + <part name='65' x='383' y='161'/> + <part name='66' x='380' y='161'/> + <part name='67' x='378' y='160'/> + </box> + <box top='122' left='314' width='37' height='37'> + <part name='00' x='319' y='132'/> + <part name='01' x='319' y='137'/> + <part name='02' x='320' y='141'/> + <part name='03' x='321' y='145'/> + <part name='04' x='322' y='149'/> + <part name='05' x='324' y='153'/> + <part name='06' x='327' y='156'/> + <part name='07' x='330' y='159'/> + <part name='08' x='334' y='159'/> + <part name='09' x='338' y='158'/> + <part name='10' x='343' y='155'/> + <part name='11' x='347' y='152'/> + <part name='12' x='350' y='148'/> + <part name='13' x='352' y='143'/> + <part name='14' x='352' y='138'/> + <part name='15' x='353' y='133'/> + <part name='16' x='353' y='128'/> + <part name='17' x='319' y='129'/> + <part name='18' x='320' y='127'/> + <part name='19' x='322' y='126'/> + <part name='20' x='324' y='126'/> + <part name='21' x='327' y='127'/> + <part name='22' x='334' y='125'/> + <part name='23' x='337' y='124'/> + <part name='24' x='340' y='124'/> + <part name='25' x='343' y='124'/> + <part name='26' x='346' y='126'/> + <part name='27' x='330' y='130'/> + <part name='28' x='330' y='133'/> + <part name='29' x='330' y='135'/> + <part name='30' x='330' y='138'/> + <part name='31' x='328' y='140'/> + <part name='32' x='329' y='141'/> + <part name='33' x='331' y='141'/> + <part name='34' x='333' y='140'/> + <part name='35' x='335' y='140'/> + <part name='36' x='322' y='132'/> + <part name='37' x='324' y='130'/> + <part name='38' x='326' y='130'/> + <part name='39' x='328' y='131'/> + <part name='40' x='326' y='132'/> + <part name='41' x='324' y='132'/> + <part name='42' x='336' y='130'/> + <part name='43' x='338' y='128'/> + <part name='44' x='340' y='128'/> + <part name='45' x='342' y='129'/> + <part name='46' x='341' y='130'/> + <part name='47' x='338' y='130'/> + <part name='48' x='326' y='146'/> + <part name='49' x='327' y='144'/> + <part name='50' x='330' y='144'/> + <part name='51' x='332' y='144'/> + <part name='52' x='334' y='143'/> + <part name='53' x='338' y='143'/> + <part name='54' x='341' y='144'/> + <part name='55' x='338' y='148'/> + <part name='56' x='335' y='150'/> + <part name='57' x='333' y='151'/> + <part name='58' x='330' y='151'/> + <part name='59' x='328' y='150'/> + <part name='60' x='327' y='146'/> + <part name='61' x='330' y='144'/> + <part name='62' x='332' y='145'/> + <part name='63' x='334' y='144'/> + <part name='64' x='340' y='145'/> + <part name='65' x='335' y='148'/> + <part name='66' x='332' y='149'/> + <part name='67' x='330' y='149'/> + </box> + <box top='141' left='107' width='52' height='53'> + <part name='00' x='107' y='151'/> + <part name='01' x='106' y='158'/> + <part name='02' x='106' y='165'/> + <part name='03' x='106' y='171'/> + <part name='04' x='108' y='178'/> + <part name='05' x='112' y='184'/> + <part name='06' x='117' y='188'/> + <part name='07' x='122' y='192'/> + <part name='08' x='128' y='194'/> + <part name='09' x='133' y='193'/> + <part name='10' x='138' y='191'/> + <part name='11' x='143' y='187'/> + <part name='12' x='147' y='183'/> + <part name='13' x='150' y='177'/> + <part name='14' x='151' y='172'/> + <part name='15' x='153' y='166'/> + <part name='16' x='154' y='161'/> + <part name='17' x='115' y='148'/> + <part name='18' x='119' y='145'/> + <part name='19' x='124' y='144'/> + <part name='20' x='129' y='146'/> + <part name='21' x='132' y='148'/> + <part name='22' x='141' y='150'/> + <part name='23' x='144' y='149'/> + <part name='24' x='149' y='149'/> + <part name='25' x='152' y='152'/> + <part name='26' x='153' y='155'/> + <part name='27' x='136' y='153'/> + <part name='28' x='136' y='157'/> + <part name='29' x='135' y='161'/> + <part name='30' x='135' y='165'/> + <part name='31' x='128' y='166'/> + <part name='32' x='131' y='167'/> + <part name='33' x='133' y='169'/> + <part name='34' x='136' y='168'/> + <part name='35' x='138' y='168'/> + <part name='36' x='120' y='152'/> + <part name='37' x='123' y='151'/> + <part name='38' x='126' y='152'/> + <part name='39' x='128' y='154'/> + <part name='40' x='125' y='154'/> + <part name='41' x='122' y='153'/> + <part name='42' x='140' y='157'/> + <part name='43' x='143' y='155'/> + <part name='44' x='146' y='155'/> + <part name='45' x='148' y='158'/> + <part name='46' x='146' y='158'/> + <part name='47' x='143' y='158'/> + <part name='48' x='119' y='172'/> + <part name='49' x='124' y='170'/> + <part name='50' x='129' y='170'/> + <part name='51' x='131' y='171'/> + <part name='52' x='135' y='171'/> + <part name='53' x='138' y='173'/> + <part name='54' x='141' y='176'/> + <part name='55' x='137' y='180'/> + <part name='56' x='133' y='181'/> + <part name='57' x='130' y='180'/> + <part name='58' x='127' y='179'/> + <part name='59' x='122' y='177'/> + <part name='60' x='120' y='172'/> + <part name='61' x='128' y='171'/> + <part name='62' x='131' y='172'/> + <part name='63' x='134' y='173'/> + <part name='64' x='139' y='175'/> + <part name='65' x='134' y='178'/> + <part name='66' x='130' y='178'/> + <part name='67' x='127' y='177'/> + </box> + <box top='84' left='137' width='44' height='44'> + <part name='00' x='131' y='97'/> + <part name='01' x='133' y='103'/> + <part name='02' x='134' y='109'/> + <part name='03' x='137' y='115'/> + <part name='04' x='141' y='119'/> + <part name='05' x='147' y='123'/> + <part name='06' x='153' y='126'/> + <part name='07' x='159' y='128'/> + <part name='08' x='165' y='128'/> + <part name='09' x='169' y='126'/> + <part name='10' x='172' y='122'/> + <part name='11' x='174' y='117'/> + <part name='12' x='176' y='112'/> + <part name='13' x='177' y='107'/> + <part name='14' x='177' y='102'/> + <part name='15' x='176' y='97'/> + <part name='16' x='175' y='92'/> + <part name='17' x='140' y='92'/> + <part name='18' x='143' y='89'/> + <part name='19' x='147' y='88'/> + <part name='20' x='151' y='88'/> + <part name='21' x='156' y='88'/> + <part name='22' x='163' y='88'/> + <part name='23' x='165' y='86'/> + <part name='24' x='168' y='85'/> + <part name='25' x='171' y='85'/> + <part name='26' x='173' y='87'/> + <part name='27' x='160' y='92'/> + <part name='28' x='161' y='95'/> + <part name='29' x='162' y='99'/> + <part name='30' x='163' y='102'/> + <part name='31' x='158' y='105'/> + <part name='32' x='160' y='106'/> + <part name='33' x='163' y='106'/> + <part name='34' x='164' y='105'/> + <part name='35' x='166' y='104'/> + <part name='36' x='146' y='94'/> + <part name='37' x='148' y='93'/> + <part name='38' x='150' y='92'/> + <part name='39' x='153' y='94'/> + <part name='40' x='150' y='94'/> + <part name='41' x='148' y='94'/> + <part name='42' x='164' y='92'/> + <part name='43' x='166' y='90'/> + <part name='44' x='168' y='90'/> + <part name='45' x='170' y='91'/> + <part name='46' x='168' y='92'/> + <part name='47' x='166' y='92'/> + <part name='48' x='152' y='113'/> + <part name='49' x='156' y='110'/> + <part name='50' x='160' y='109'/> + <part name='51' x='163' y='109'/> + <part name='52' x='165' y='108'/> + <part name='53' x='168' y='108'/> + <part name='54' x='170' y='110'/> + <part name='55' x='169' y='114'/> + <part name='56' x='166' y='116'/> + <part name='57' x='164' y='117'/> + <part name='58' x='161' y='117'/> + <part name='59' x='156' y='116'/> + <part name='60' x='153' y='113'/> + <part name='61' x='160' y='111'/> + <part name='62' x='163' y='111'/> + <part name='63' x='165' y='110'/> + <part name='64' x='169' y='110'/> + <part name='65' x='166' y='114'/> + <part name='66' x='163' y='115'/> + <part name='67' x='160' y='115'/> + </box> + </image> + <image file='2009_004587.jpg'> + <box top='46' left='154' width='75' height='76'> + <part name='00' x='147' y='74'/> + <part name='01' x='147' y='84'/> + <part name='02' x='148' y='94'/> + <part name='03' x='150' y='104'/> + <part name='04' x='154' y='113'/> + <part name='05' x='162' y='121'/> + <part name='06' x='171' y='127'/> + <part name='07' x='180' y='132'/> + <part name='08' x='191' y='133'/> + <part name='09' x='201' y='132'/> + <part name='10' x='208' y='126'/> + <part name='11' x='214' y='119'/> + <part name='12' x='218' y='111'/> + <part name='13' x='221' y='102'/> + <part name='14' x='222' y='93'/> + <part name='15' x='222' y='85'/> + <part name='16' x='222' y='77'/> + <part name='17' x='160' y='65'/> + <part name='18' x='165' y='60'/> + <part name='19' x='172' y='57'/> + <part name='20' x='180' y='57'/> + <part name='21' x='187' y='59'/> + <part name='22' x='200' y='60'/> + <part name='23' x='206' y='59'/> + <part name='24' x='213' y='60'/> + <part name='25' x='217' y='64'/> + <part name='26' x='219' y='69'/> + <part name='27' x='194' y='67'/> + <part name='28' x='194' y='72'/> + <part name='29' x='195' y='77'/> + <part name='30' x='196' y='83'/> + <part name='31' x='186' y='89'/> + <part name='32' x='190' y='91'/> + <part name='33' x='194' y='92'/> + <part name='34' x='198' y='91'/> + <part name='35' x='201' y='90'/> + <part name='36' x='169' y='69'/> + <part name='37' x='173' y='67'/> + <part name='38' x='178' y='67'/> + <part name='39' x='182' y='70'/> + <part name='40' x='178' y='70'/> + <part name='41' x='173' y='70'/> + <part name='42' x='201' y='71'/> + <part name='43' x='205' y='70'/> + <part name='44' x='209' y='70'/> + <part name='45' x='213' y='73'/> + <part name='46' x='209' y='73'/> + <part name='47' x='205' y='73'/> + <part name='48' x='174' y='100'/> + <part name='49' x='182' y='99'/> + <part name='50' x='189' y='99'/> + <part name='51' x='194' y='100'/> + <part name='52' x='198' y='99'/> + <part name='53' x='203' y='101'/> + <part name='54' x='207' y='104'/> + <part name='55' x='202' y='107'/> + <part name='56' x='197' y='109'/> + <part name='57' x='192' y='108'/> + <part name='58' x='188' y='108'/> + <part name='59' x='181' y='105'/> + <part name='60' x='176' y='101'/> + <part name='61' x='189' y='101'/> + <part name='62' x='193' y='102'/> + <part name='63' x='198' y='102'/> + <part name='64' x='205' y='104'/> + <part name='65' x='197' y='105'/> + <part name='66' x='193' y='105'/> + <part name='67' x='188' y='104'/> + </box> + <box top='280' left='266' width='63' height='63'> + <part name='00' x='267' y='303'/> + <part name='01' x='267' y='311'/> + <part name='02' x='269' y='319'/> + <part name='03' x='271' y='327'/> + <part name='04' x='275' y='334'/> + <part name='05' x='281' y='340'/> + <part name='06' x='288' y='345'/> + <part name='07' x='296' y='348'/> + <part name='08' x='304' y='348'/> + <part name='09' x='311' y='345'/> + <part name='10' x='316' y='339'/> + <part name='11' x='320' y='332'/> + <part name='12' x='323' y='325'/> + <part name='13' x='325' y='318'/> + <part name='14' x='325' y='311'/> + <part name='15' x='325' y='304'/> + <part name='16' x='324' y='297'/> + <part name='17' x='272' y='294'/> + <part name='18' x='277' y='291'/> + <part name='19' x='282' y='290'/> + <part name='20' x='287' y='290'/> + <part name='21' x='293' y='292'/> + <part name='22' x='305' y='291'/> + <part name='23' x='309' y='288'/> + <part name='24' x='313' y='287'/> + <part name='25' x='318' y='287'/> + <part name='26' x='321' y='290'/> + <part name='27' x='300' y='297'/> + <part name='28' x='301' y='302'/> + <part name='29' x='302' y='307'/> + <part name='30' x='303' y='313'/> + <part name='31' x='296' y='316'/> + <part name='32' x='299' y='317'/> + <part name='33' x='303' y='318'/> + <part name='34' x='306' y='317'/> + <part name='35' x='308' y='315'/> + <part name='36' x='279' y='298'/> + <part name='37' x='283' y='296'/> + <part name='38' x='287' y='296'/> + <part name='39' x='291' y='299'/> + <part name='40' x='287' y='300'/> + <part name='41' x='283' y='300'/> + <part name='42' x='306' y='298'/> + <part name='43' x='310' y='294'/> + <part name='44' x='314' y='293'/> + <part name='45' x='318' y='296'/> + <part name='46' x='315' y='297'/> + <part name='47' x='311' y='298'/> + <part name='48' x='286' y='324'/> + <part name='49' x='292' y='322'/> + <part name='50' x='299' y='322'/> + <part name='51' x='303' y='322'/> + <part name='52' x='307' y='321'/> + <part name='53' x='311' y='321'/> + <part name='54' x='315' y='322'/> + <part name='55' x='312' y='329'/> + <part name='56' x='308' y='333'/> + <part name='57' x='303' y='334'/> + <part name='58' x='299' y='333'/> + <part name='59' x='292' y='331'/> + <part name='60' x='288' y='324'/> + <part name='61' x='299' y='324'/> + <part name='62' x='303' y='324'/> + <part name='63' x='307' y='324'/> + <part name='64' x='313' y='323'/> + <part name='65' x='307' y='329'/> + <part name='66' x='303' y='330'/> + <part name='67' x='299' y='329'/> + </box> + </image> +</images> +</dataset> diff --git a/ml/dlib/examples/faces/training.xml b/ml/dlib/examples/faces/training.xml new file mode 100644 index 00000000..b7e1d007 --- /dev/null +++ b/ml/dlib/examples/faces/training.xml @@ -0,0 +1,34 @@ +<?xml version='1.0' encoding='ISO-8859-1'?> +<?xml-stylesheet type='text/xsl' href='image_metadata_stylesheet.xsl'?> +<dataset> +<name>Training faces</name> +<comment>These are images from the PASCAL VOC 2011 dataset.</comment> +<images> + <image file='2007_007763.jpg'> + <box top='90' left='194' width='37' height='37'/> + <box top='114' left='158' width='37' height='37'/> + <box top='89' left='381' width='45' height='44'/> + <box top='198' left='94' width='37' height='37'/> + <box top='214' left='178' width='37' height='37'/> + <box top='86' left='294' width='37' height='37'/> + <box top='233' left='309' width='45' height='44'/> + </image> + <image file='2008_002079.jpg'> + <box top='166' left='407' width='37' height='37'/> + <box top='134' left='122' width='37' height='37'/> + <box top='138' left='346' width='37' height='37'/> + <box top='171' left='433' width='53' height='52'/> + <box top='134' left='62' width='37' height='37'/> + <box top='194' left='41' width='44' height='44'/> + </image> + <image file='2008_001009.jpg'> + <box top='79' left='145' width='76' height='76'/> + <box top='214' left='125' width='90' height='91'/> + </image> + <image file='2008_001322.jpg'> + <box top='162' left='104' width='76' height='76'/> + <box top='218' left='232' width='63' height='63'/> + <box top='155' left='344' width='90' height='90'/> + </image> +</images> +</dataset>
\ No newline at end of file diff --git a/ml/dlib/examples/faces/training_with_face_landmarks.xml b/ml/dlib/examples/faces/training_with_face_landmarks.xml new file mode 100644 index 00000000..b87e7535 --- /dev/null +++ b/ml/dlib/examples/faces/training_with_face_landmarks.xml @@ -0,0 +1,1280 @@ +<?xml version='1.0' encoding='ISO-8859-1'?> +<?xml-stylesheet type='text/xsl' href='image_metadata_stylesheet.xsl'?> +<dataset> +<name>Training faces</name> +<comment>These are images from the PASCAL VOC 2011 dataset. + The face landmarks are from dlib's shape_predictor_68_face_landmarks.dat + landmarking model. The model uses the 68 landmark scheme used by the iBUG + 300-W dataset. +</comment> +<images> + <image file='2007_007763.jpg'> + <box top='90' left='194' width='37' height='37'> + <part name='00' x='201' y='107'/> + <part name='01' x='201' y='110'/> + <part name='02' x='201' y='113'/> + <part name='03' x='202' y='117'/> + <part name='04' x='204' y='120'/> + <part name='05' x='206' y='123'/> + <part name='06' x='208' y='126'/> + <part name='07' x='210' y='129'/> + <part name='08' x='213' y='129'/> + <part name='09' x='217' y='129'/> + <part name='10' x='220' y='127'/> + <part name='11' x='224' y='124'/> + <part name='12' x='228' y='121'/> + <part name='13' x='230' y='118'/> + <part name='14' x='231' y='114'/> + <part name='15' x='231' y='109'/> + <part name='16' x='231' y='105'/> + <part name='17' x='201' y='102'/> + <part name='18' x='202' y='101'/> + <part name='19' x='204' y='100'/> + <part name='20' x='206' y='100'/> + <part name='21' x='207' y='101'/> + <part name='22' x='211' y='101'/> + <part name='23' x='214' y='100'/> + <part name='24' x='217' y='99'/> + <part name='25' x='220' y='100'/> + <part name='26' x='223' y='102'/> + <part name='27' x='209' y='105'/> + <part name='28' x='209' y='108'/> + <part name='29' x='208' y='111'/> + <part name='30' x='208' y='114'/> + <part name='31' x='206' y='115'/> + <part name='32' x='208' y='116'/> + <part name='33' x='209' y='116'/> + <part name='34' x='211' y='116'/> + <part name='35' x='213' y='115'/> + <part name='36' x='203' y='106'/> + <part name='37' x='204' y='105'/> + <part name='38' x='206' y='105'/> + <part name='39' x='207' y='106'/> + <part name='40' x='206' y='106'/> + <part name='41' x='204' y='106'/> + <part name='42' x='215' y='106'/> + <part name='43' x='216' y='105'/> + <part name='44' x='218' y='105'/> + <part name='45' x='220' y='106'/> + <part name='46' x='218' y='106'/> + <part name='47' x='216' y='106'/> + <part name='48' x='207' y='120'/> + <part name='49' x='207' y='119'/> + <part name='50' x='209' y='119'/> + <part name='51' x='210' y='119'/> + <part name='52' x='212' y='118'/> + <part name='53' x='215' y='119'/> + <part name='54' x='218' y='119'/> + <part name='55' x='216' y='122'/> + <part name='56' x='213' y='123'/> + <part name='57' x='211' y='123'/> + <part name='58' x='210' y='123'/> + <part name='59' x='208' y='122'/> + <part name='60' x='208' y='120'/> + <part name='61' x='209' y='120'/> + <part name='62' x='211' y='120'/> + <part name='63' x='212' y='119'/> + <part name='64' x='217' y='119'/> + <part name='65' x='213' y='121'/> + <part name='66' x='211' y='122'/> + <part name='67' x='209' y='121'/> + </box> + <box top='114' left='158' width='37' height='37'> + <part name='00' x='165' y='131'/> + <part name='01' x='164' y='134'/> + <part name='02' x='165' y='137'/> + <part name='03' x='166' y='141'/> + <part name='04' x='167' y='144'/> + <part name='05' x='169' y='147'/> + <part name='06' x='172' y='149'/> + <part name='07' x='174' y='151'/> + <part name='08' x='177' y='152'/> + <part name='09' x='181' y='152'/> + <part name='10' x='184' y='150'/> + <part name='11' x='188' y='147'/> + <part name='12' x='191' y='145'/> + <part name='13' x='193' y='142'/> + <part name='14' x='194' y='138'/> + <part name='15' x='194' y='134'/> + <part name='16' x='194' y='130'/> + <part name='17' x='165' y='127'/> + <part name='18' x='166' y='125'/> + <part name='19' x='167' y='125'/> + <part name='20' x='169' y='125'/> + <part name='21' x='171' y='126'/> + <part name='22' x='175' y='125'/> + <part name='23' x='178' y='124'/> + <part name='24' x='181' y='124'/> + <part name='25' x='184' y='125'/> + <part name='26' x='186' y='126'/> + <part name='27' x='173' y='130'/> + <part name='28' x='173' y='132'/> + <part name='29' x='173' y='134'/> + <part name='30' x='173' y='137'/> + <part name='31' x='171' y='139'/> + <part name='32' x='172' y='139'/> + <part name='33' x='174' y='140'/> + <part name='34' x='175' y='139'/> + <part name='35' x='177' y='139'/> + <part name='36' x='167' y='131'/> + <part name='37' x='168' y='130'/> + <part name='38' x='170' y='130'/> + <part name='39' x='171' y='131'/> + <part name='40' x='170' y='131'/> + <part name='41' x='168' y='131'/> + <part name='42' x='178' y='130'/> + <part name='43' x='179' y='129'/> + <part name='44' x='181' y='129'/> + <part name='45' x='183' y='130'/> + <part name='46' x='181' y='130'/> + <part name='47' x='179' y='130'/> + <part name='48' x='171' y='143'/> + <part name='49' x='172' y='143'/> + <part name='50' x='173' y='143'/> + <part name='51' x='174' y='143'/> + <part name='52' x='176' y='142'/> + <part name='53' x='178' y='142'/> + <part name='54' x='182' y='142'/> + <part name='55' x='179' y='145'/> + <part name='56' x='176' y='146'/> + <part name='57' x='175' y='146'/> + <part name='58' x='174' y='146'/> + <part name='59' x='172' y='145'/> + <part name='60' x='172' y='143'/> + <part name='61' x='174' y='143'/> + <part name='62' x='175' y='144'/> + <part name='63' x='176' y='143'/> + <part name='64' x='181' y='143'/> + <part name='65' x='176' y='144'/> + <part name='66' x='175' y='144'/> + <part name='67' x='174' y='144'/> + </box> + <box top='89' left='381' width='45' height='44'> + <part name='00' x='393' y='107'/> + <part name='01' x='393' y='111'/> + <part name='02' x='393' y='116'/> + <part name='03' x='395' y='120'/> + <part name='04' x='398' y='123'/> + <part name='05' x='400' y='127'/> + <part name='06' x='401' y='132'/> + <part name='07' x='403' y='136'/> + <part name='08' x='406' y='137'/> + <part name='09' x='411' y='137'/> + <part name='10' x='416' y='134'/> + <part name='11' x='422' y='130'/> + <part name='12' x='427' y='126'/> + <part name='13' x='430' y='120'/> + <part name='14' x='432' y='114'/> + <part name='15' x='431' y='107'/> + <part name='16' x='431' y='100'/> + <part name='17' x='391' y='98'/> + <part name='18' x='392' y='97'/> + <part name='19' x='393' y='97'/> + <part name='20' x='394' y='97'/> + <part name='21' x='396' y='99'/> + <part name='22' x='401' y='98'/> + <part name='23' x='404' y='96'/> + <part name='24' x='408' y='95'/> + <part name='25' x='412' y='96'/> + <part name='26' x='415' y='97'/> + <part name='27' x='399' y='104'/> + <part name='28' x='398' y='107'/> + <part name='29' x='397' y='111'/> + <part name='30' x='396' y='114'/> + <part name='31' x='396' y='116'/> + <part name='32' x='397' y='117'/> + <part name='33' x='399' y='118'/> + <part name='34' x='401' y='117'/> + <part name='35' x='403' y='116'/> + <part name='36' x='394' y='104'/> + <part name='37' x='395' y='103'/> + <part name='38' x='396' y='103'/> + <part name='39' x='398' y='104'/> + <part name='40' x='396' y='104'/> + <part name='41' x='395' y='104'/> + <part name='42' x='405' y='103'/> + <part name='43' x='406' y='102'/> + <part name='44' x='408' y='102'/> + <part name='45' x='411' y='102'/> + <part name='46' x='409' y='103'/> + <part name='47' x='407' y='103'/> + <part name='48' x='399' y='123'/> + <part name='49' x='397' y='121'/> + <part name='50' x='399' y='121'/> + <part name='51' x='400' y='121'/> + <part name='52' x='402' y='121'/> + <part name='53' x='407' y='121'/> + <part name='54' x='411' y='122'/> + <part name='55' x='408' y='126'/> + <part name='56' x='404' y='129'/> + <part name='57' x='402' y='129'/> + <part name='58' x='400' y='129'/> + <part name='59' x='399' y='127'/> + <part name='60' x='399' y='123'/> + <part name='61' x='399' y='122'/> + <part name='62' x='401' y='122'/> + <part name='63' x='403' y='122'/> + <part name='64' x='410' y='122'/> + <part name='65' x='403' y='126'/> + <part name='66' x='402' y='126'/> + <part name='67' x='400' y='126'/> + </box> + <box top='198' left='94' width='37' height='37'> + <part name='00' x='100' y='208'/> + <part name='01' x='100' y='211'/> + <part name='02' x='100' y='215'/> + <part name='03' x='100' y='218'/> + <part name='04' x='101' y='222'/> + <part name='05' x='103' y='225'/> + <part name='06' x='105' y='227'/> + <part name='07' x='107' y='229'/> + <part name='08' x='110' y='231'/> + <part name='09' x='114' y='230'/> + <part name='10' x='118' y='229'/> + <part name='11' x='122' y='227'/> + <part name='12' x='126' y='224'/> + <part name='13' x='128' y='221'/> + <part name='14' x='129' y='217'/> + <part name='15' x='130' y='213'/> + <part name='16' x='130' y='209'/> + <part name='17' x='101' y='204'/> + <part name='18' x='102' y='203'/> + <part name='19' x='104' y='203'/> + <part name='20' x='106' y='203'/> + <part name='21' x='108' y='204'/> + <part name='22' x='112' y='204'/> + <part name='23' x='115' y='203'/> + <part name='24' x='118' y='203'/> + <part name='25' x='121' y='204'/> + <part name='26' x='123' y='205'/> + <part name='27' x='110' y='207'/> + <part name='28' x='110' y='210'/> + <part name='29' x='109' y='212'/> + <part name='30' x='109' y='215'/> + <part name='31' x='107' y='216'/> + <part name='32' x='108' y='217'/> + <part name='33' x='109' y='218'/> + <part name='34' x='111' y='217'/> + <part name='35' x='113' y='217'/> + <part name='36' x='103' y='207'/> + <part name='37' x='104' y='207'/> + <part name='38' x='106' y='207'/> + <part name='39' x='107' y='208'/> + <part name='40' x='106' y='208'/> + <part name='41' x='104' y='208'/> + <part name='42' x='115' y='208'/> + <part name='43' x='116' y='207'/> + <part name='44' x='118' y='207'/> + <part name='45' x='119' y='208'/> + <part name='46' x='118' y='209'/> + <part name='47' x='116' y='208'/> + <part name='48' x='106' y='221'/> + <part name='49' x='107' y='220'/> + <part name='50' x='108' y='220'/> + <part name='51' x='110' y='220'/> + <part name='52' x='111' y='220'/> + <part name='53' x='114' y='221'/> + <part name='54' x='117' y='221'/> + <part name='55' x='114' y='223'/> + <part name='56' x='111' y='223'/> + <part name='57' x='110' y='223'/> + <part name='58' x='108' y='223'/> + <part name='59' x='107' y='222'/> + <part name='60' x='107' y='221'/> + <part name='61' x='108' y='221'/> + <part name='62' x='110' y='221'/> + <part name='63' x='111' y='221'/> + <part name='64' x='115' y='221'/> + <part name='65' x='111' y='221'/> + <part name='66' x='110' y='221'/> + <part name='67' x='108' y='221'/> + </box> + <box top='214' left='178' width='37' height='37'> + <part name='00' x='186' y='225'/> + <part name='01' x='185' y='228'/> + <part name='02' x='184' y='231'/> + <part name='03' x='184' y='235'/> + <part name='04' x='184' y='238'/> + <part name='05' x='185' y='241'/> + <part name='06' x='186' y='245'/> + <part name='07' x='187' y='248'/> + <part name='08' x='190' y='249'/> + <part name='09' x='194' y='250'/> + <part name='10' x='198' y='249'/> + <part name='11' x='203' y='247'/> + <part name='12' x='207' y='245'/> + <part name='13' x='210' y='242'/> + <part name='14' x='212' y='238'/> + <part name='15' x='213' y='234'/> + <part name='16' x='214' y='229'/> + <part name='17' x='186' y='221'/> + <part name='18' x='188' y='220'/> + <part name='19' x='190' y='220'/> + <part name='20' x='191' y='221'/> + <part name='21' x='193' y='223'/> + <part name='22' x='197' y='224'/> + <part name='23' x='200' y='224'/> + <part name='24' x='203' y='224'/> + <part name='25' x='205' y='225'/> + <part name='26' x='207' y='227'/> + <part name='27' x='194' y='226'/> + <part name='28' x='193' y='229'/> + <part name='29' x='192' y='231'/> + <part name='30' x='191' y='234'/> + <part name='31' x='189' y='234'/> + <part name='32' x='190' y='236'/> + <part name='33' x='192' y='237'/> + <part name='34' x='194' y='236'/> + <part name='35' x='195' y='236'/> + <part name='36' x='188' y='224'/> + <part name='37' x='189' y='224'/> + <part name='38' x='191' y='225'/> + <part name='39' x='192' y='226'/> + <part name='40' x='190' y='225'/> + <part name='41' x='189' y='225'/> + <part name='42' x='199' y='227'/> + <part name='43' x='201' y='227'/> + <part name='44' x='202' y='228'/> + <part name='45' x='204' y='229'/> + <part name='46' x='202' y='229'/> + <part name='47' x='200' y='228'/> + <part name='48' x='188' y='238'/> + <part name='49' x='189' y='238'/> + <part name='50' x='190' y='239'/> + <part name='51' x='192' y='239'/> + <part name='52' x='194' y='239'/> + <part name='53' x='196' y='240'/> + <part name='54' x='199' y='241'/> + <part name='55' x='196' y='243'/> + <part name='56' x='193' y='243'/> + <part name='57' x='191' y='243'/> + <part name='58' x='189' y='242'/> + <part name='59' x='188' y='241'/> + <part name='60' x='188' y='239'/> + <part name='61' x='190' y='240'/> + <part name='62' x='192' y='240'/> + <part name='63' x='193' y='240'/> + <part name='64' x='198' y='241'/> + <part name='65' x='193' y='241'/> + <part name='66' x='191' y='241'/> + <part name='67' x='190' y='240'/> + </box> + <box top='86' left='294' width='37' height='37'> + <part name='00' x='299' y='101'/> + <part name='01' x='299' y='105'/> + <part name='02' x='299' y='109'/> + <part name='03' x='300' y='113'/> + <part name='04' x='302' y='116'/> + <part name='05' x='304' y='120'/> + <part name='06' x='306' y='124'/> + <part name='07' x='308' y='127'/> + <part name='08' x='312' y='128'/> + <part name='09' x='316' y='127'/> + <part name='10' x='321' y='125'/> + <part name='11' x='325' y='121'/> + <part name='12' x='329' y='118'/> + <part name='13' x='332' y='113'/> + <part name='14' x='333' y='108'/> + <part name='15' x='334' y='102'/> + <part name='16' x='334' y='96'/> + <part name='17' x='298' y='97'/> + <part name='18' x='298' y='95'/> + <part name='19' x='300' y='94'/> + <part name='20' x='302' y='94'/> + <part name='21' x='305' y='95'/> + <part name='22' x='309' y='94'/> + <part name='23' x='313' y='92'/> + <part name='24' x='316' y='91'/> + <part name='25' x='320' y='92'/> + <part name='26' x='324' y='94'/> + <part name='27' x='307' y='98'/> + <part name='28' x='307' y='101'/> + <part name='29' x='306' y='104'/> + <part name='30' x='306' y='107'/> + <part name='31' x='304' y='109'/> + <part name='32' x='306' y='110'/> + <part name='33' x='308' y='110'/> + <part name='34' x='310' y='109'/> + <part name='35' x='312' y='108'/> + <part name='36' x='301' y='100'/> + <part name='37' x='302' y='99'/> + <part name='38' x='303' y='99'/> + <part name='39' x='305' y='99'/> + <part name='40' x='304' y='100'/> + <part name='41' x='302' y='100'/> + <part name='42' x='314' y='98'/> + <part name='43' x='315' y='97'/> + <part name='44' x='317' y='97'/> + <part name='45' x='319' y='97'/> + <part name='46' x='317' y='98'/> + <part name='47' x='315' y='98'/> + <part name='48' x='305' y='115'/> + <part name='49' x='306' y='114'/> + <part name='50' x='307' y='113'/> + <part name='51' x='309' y='113'/> + <part name='52' x='310' y='112'/> + <part name='53' x='314' y='112'/> + <part name='54' x='318' y='113'/> + <part name='55' x='315' y='117'/> + <part name='56' x='312' y='118'/> + <part name='57' x='310' y='119'/> + <part name='58' x='308' y='119'/> + <part name='59' x='307' y='118'/> + <part name='60' x='306' y='115'/> + <part name='61' x='307' y='114'/> + <part name='62' x='309' y='114'/> + <part name='63' x='311' y='113'/> + <part name='64' x='317' y='113'/> + <part name='65' x='311' y='117'/> + <part name='66' x='309' y='117'/> + <part name='67' x='308' y='117'/> + </box> + <box top='233' left='309' width='45' height='44'> + <part name='00' x='322' y='246'/> + <part name='01' x='320' y='249'/> + <part name='02' x='318' y='253'/> + <part name='03' x='318' y='258'/> + <part name='04' x='318' y='262'/> + <part name='05' x='318' y='267'/> + <part name='06' x='319' y='271'/> + <part name='07' x='320' y='276'/> + <part name='08' x='323' y='278'/> + <part name='09' x='328' y='279'/> + <part name='10' x='334' y='279'/> + <part name='11' x='340' y='278'/> + <part name='12' x='346' y='276'/> + <part name='13' x='350' y='273'/> + <part name='14' x='354' y='268'/> + <part name='15' x='356' y='263'/> + <part name='16' x='359' y='257'/> + <part name='17' x='323' y='239'/> + <part name='18' x='325' y='238'/> + <part name='19' x='327' y='238'/> + <part name='20' x='328' y='240'/> + <part name='21' x='329' y='242'/> + <part name='22' x='336' y='244'/> + <part name='23' x='339' y='244'/> + <part name='24' x='343' y='244'/> + <part name='25' x='347' y='246'/> + <part name='26' x='349' y='249'/> + <part name='27' x='331' y='248'/> + <part name='28' x='329' y='251'/> + <part name='29' x='328' y='254'/> + <part name='30' x='326' y='257'/> + <part name='31' x='324' y='259'/> + <part name='32' x='325' y='260'/> + <part name='33' x='326' y='261'/> + <part name='34' x='328' y='261'/> + <part name='35' x='331' y='262'/> + <part name='36' x='324' y='246'/> + <part name='37' x='325' y='246'/> + <part name='38' x='327' y='247'/> + <part name='39' x='328' y='248'/> + <part name='40' x='326' y='248'/> + <part name='41' x='325' y='247'/> + <part name='42' x='337' y='251'/> + <part name='43' x='339' y='251'/> + <part name='44' x='341' y='252'/> + <part name='45' x='343' y='253'/> + <part name='46' x='341' y='253'/> + <part name='47' x='339' y='252'/> + <part name='48' x='322' y='264'/> + <part name='49' x='322' y='264'/> + <part name='50' x='324' y='264'/> + <part name='51' x='325' y='265'/> + <part name='52' x='327' y='265'/> + <part name='53' x='331' y='267'/> + <part name='54' x='334' y='270'/> + <part name='55' x='330' y='271'/> + <part name='56' x='326' y='271'/> + <part name='57' x='324' y='270'/> + <part name='58' x='323' y='270'/> + <part name='59' x='322' y='268'/> + <part name='60' x='322' y='265'/> + <part name='61' x='324' y='265'/> + <part name='62' x='325' y='266'/> + <part name='63' x='327' y='267'/> + <part name='64' x='333' y='269'/> + <part name='65' x='327' y='269'/> + <part name='66' x='325' y='269'/> + <part name='67' x='323' y='267'/> + </box> + </image> + <image file='2008_002079.jpg'> + <box top='166' left='406' width='37' height='37'> + <part name='00' x='412' y='179'/> + <part name='01' x='411' y='183'/> + <part name='02' x='412' y='187'/> + <part name='03' x='412' y='190'/> + <part name='04' x='413' y='194'/> + <part name='05' x='415' y='197'/> + <part name='06' x='418' y='200'/> + <part name='07' x='421' y='203'/> + <part name='08' x='424' y='204'/> + <part name='09' x='428' y='203'/> + <part name='10' x='433' y='202'/> + <part name='11' x='437' y='200'/> + <part name='12' x='441' y='197'/> + <part name='13' x='443' y='193'/> + <part name='14' x='445' y='188'/> + <part name='15' x='446' y='184'/> + <part name='16' x='446' y='179'/> + <part name='17' x='413' y='174'/> + <part name='18' x='414' y='172'/> + <part name='19' x='416' y='171'/> + <part name='20' x='419' y='171'/> + <part name='21' x='421' y='172'/> + <part name='22' x='427' y='171'/> + <part name='23' x='431' y='171'/> + <part name='24' x='434' y='170'/> + <part name='25' x='437' y='171'/> + <part name='26' x='440' y='173'/> + <part name='27' x='424' y='176'/> + <part name='28' x='423' y='179'/> + <part name='29' x='422' y='182'/> + <part name='30' x='422' y='185'/> + <part name='31' x='421' y='186'/> + <part name='32' x='422' y='187'/> + <part name='33' x='423' y='188'/> + <part name='34' x='425' y='187'/> + <part name='35' x='427' y='186'/> + <part name='36' x='415' y='177'/> + <part name='37' x='417' y='176'/> + <part name='38' x='419' y='176'/> + <part name='39' x='421' y='177'/> + <part name='40' x='419' y='178'/> + <part name='41' x='417' y='178'/> + <part name='42' x='430' y='177'/> + <part name='43' x='432' y='175'/> + <part name='44' x='434' y='175'/> + <part name='45' x='436' y='176'/> + <part name='46' x='434' y='177'/> + <part name='47' x='432' y='177'/> + <part name='48' x='418' y='192'/> + <part name='49' x='420' y='190'/> + <part name='50' x='422' y='190'/> + <part name='51' x='424' y='190'/> + <part name='52' x='426' y='189'/> + <part name='53' x='430' y='190'/> + <part name='54' x='433' y='191'/> + <part name='55' x='430' y='194'/> + <part name='56' x='426' y='195'/> + <part name='57' x='424' y='195'/> + <part name='58' x='422' y='195'/> + <part name='59' x='420' y='194'/> + <part name='60' x='419' y='192'/> + <part name='61' x='422' y='191'/> + <part name='62' x='424' y='191'/> + <part name='63' x='426' y='191'/> + <part name='64' x='432' y='191'/> + <part name='65' x='426' y='193'/> + <part name='66' x='424' y='194'/> + <part name='67' x='422' y='193'/> + </box> + <box top='134' left='122' width='37' height='37'> + <part name='00' x='123' y='143'/> + <part name='01' x='123' y='148'/> + <part name='02' x='123' y='152'/> + <part name='03' x='124' y='156'/> + <part name='04' x='125' y='160'/> + <part name='05' x='128' y='163'/> + <part name='06' x='131' y='167'/> + <part name='07' x='134' y='169'/> + <part name='08' x='137' y='170'/> + <part name='09' x='140' y='170'/> + <part name='10' x='143' y='167'/> + <part name='11' x='145' y='164'/> + <part name='12' x='147' y='161'/> + <part name='13' x='148' y='158'/> + <part name='14' x='150' y='155'/> + <part name='15' x='150' y='151'/> + <part name='16' x='151' y='148'/> + <part name='17' x='131' y='141'/> + <part name='18' x='133' y='140'/> + <part name='19' x='136' y='140'/> + <part name='20' x='139' y='141'/> + <part name='21' x='141' y='142'/> + <part name='22' x='145' y='143'/> + <part name='23' x='147' y='143'/> + <part name='24' x='149' y='143'/> + <part name='25' x='150' y='143'/> + <part name='26' x='151' y='145'/> + <part name='27' x='143' y='146'/> + <part name='28' x='143' y='148'/> + <part name='29' x='143' y='150'/> + <part name='30' x='143' y='152'/> + <part name='31' x='139' y='154'/> + <part name='32' x='140' y='154'/> + <part name='33' x='142' y='155'/> + <part name='34' x='143' y='155'/> + <part name='35' x='144' y='154'/> + <part name='36' x='134' y='144'/> + <part name='37' x='135' y='144'/> + <part name='38' x='137' y='144'/> + <part name='39' x='138' y='145'/> + <part name='40' x='137' y='145'/> + <part name='41' x='135' y='145'/> + <part name='42' x='144' y='147'/> + <part name='43' x='146' y='146'/> + <part name='44' x='148' y='146'/> + <part name='45' x='149' y='147'/> + <part name='46' x='147' y='148'/> + <part name='47' x='146' y='147'/> + <part name='48' x='134' y='158'/> + <part name='49' x='137' y='157'/> + <part name='50' x='140' y='158'/> + <part name='51' x='141' y='158'/> + <part name='52' x='142' y='158'/> + <part name='53' x='144' y='159'/> + <part name='54' x='144' y='160'/> + <part name='55' x='143' y='162'/> + <part name='56' x='141' y='163'/> + <part name='57' x='140' y='163'/> + <part name='58' x='138' y='162'/> + <part name='59' x='136' y='161'/> + <part name='60' x='135' y='158'/> + <part name='61' x='139' y='158'/> + <part name='62' x='141' y='159'/> + <part name='63' x='142' y='159'/> + <part name='64' x='143' y='160'/> + <part name='65' x='142' y='161'/> + <part name='66' x='140' y='161'/> + <part name='67' x='139' y='161'/> + </box> + <box top='138' left='346' width='37' height='37'> + <part name='00' x='351' y='147'/> + <part name='01' x='351' y='151'/> + <part name='02' x='351' y='154'/> + <part name='03' x='351' y='158'/> + <part name='04' x='352' y='162'/> + <part name='05' x='353' y='165'/> + <part name='06' x='355' y='168'/> + <part name='07' x='358' y='170'/> + <part name='08' x='361' y='171'/> + <part name='09' x='365' y='171'/> + <part name='10' x='369' y='169'/> + <part name='11' x='373' y='168'/> + <part name='12' x='375' y='165'/> + <part name='13' x='377' y='161'/> + <part name='14' x='378' y='158'/> + <part name='15' x='379' y='154'/> + <part name='16' x='380' y='150'/> + <part name='17' x='352' y='144'/> + <part name='18' x='354' y='142'/> + <part name='19' x='356' y='142'/> + <part name='20' x='358' y='142'/> + <part name='21' x='360' y='144'/> + <part name='22' x='368' y='144'/> + <part name='23' x='371' y='143'/> + <part name='24' x='373' y='143'/> + <part name='25' x='375' y='144'/> + <part name='26' x='377' y='146'/> + <part name='27' x='364' y='147'/> + <part name='28' x='363' y='150'/> + <part name='29' x='363' y='152'/> + <part name='30' x='363' y='155'/> + <part name='31' x='360' y='156'/> + <part name='32' x='361' y='156'/> + <part name='33' x='363' y='157'/> + <part name='34' x='364' y='157'/> + <part name='35' x='365' y='156'/> + <part name='36' x='354' y='148'/> + <part name='37' x='356' y='147'/> + <part name='38' x='358' y='147'/> + <part name='39' x='360' y='148'/> + <part name='40' x='358' y='148'/> + <part name='41' x='356' y='148'/> + <part name='42' x='368' y='149'/> + <part name='43' x='370' y='148'/> + <part name='44' x='372' y='148'/> + <part name='45' x='374' y='149'/> + <part name='46' x='372' y='149'/> + <part name='47' x='370' y='149'/> + <part name='48' x='357' y='161'/> + <part name='49' x='359' y='159'/> + <part name='50' x='361' y='159'/> + <part name='51' x='363' y='159'/> + <part name='52' x='365' y='159'/> + <part name='53' x='367' y='160'/> + <part name='54' x='369' y='161'/> + <part name='55' x='367' y='163'/> + <part name='56' x='364' y='164'/> + <part name='57' x='362' y='164'/> + <part name='58' x='360' y='164'/> + <part name='59' x='358' y='163'/> + <part name='60' x='358' y='161'/> + <part name='61' x='361' y='160'/> + <part name='62' x='363' y='160'/> + <part name='63' x='364' y='160'/> + <part name='64' x='368' y='161'/> + <part name='65' x='364' y='163'/> + <part name='66' x='362' y='163'/> + <part name='67' x='361' y='162'/> + </box> + <box top='175' left='439' width='44' height='44'> + <part name='00' x='444' y='189'/> + <part name='01' x='443' y='193'/> + <part name='02' x='442' y='198'/> + <part name='03' x='442' y='203'/> + <part name='04' x='444' y='209'/> + <part name='05' x='446' y='214'/> + <part name='06' x='449' y='218'/> + <part name='07' x='452' y='222'/> + <part name='08' x='457' y='224'/> + <part name='09' x='463' y='224'/> + <part name='10' x='470' y='222'/> + <part name='11' x='477' y='219'/> + <part name='12' x='483' y='215'/> + <part name='13' x='487' y='209'/> + <part name='14' x='489' y='202'/> + <part name='15' x='490' y='195'/> + <part name='16' x='491' y='188'/> + <part name='17' x='444' y='184'/> + <part name='18' x='445' y='181'/> + <part name='19' x='448' y='181'/> + <part name='20' x='451' y='181'/> + <part name='21' x='454' y='183'/> + <part name='22' x='461' y='182'/> + <part name='23' x='465' y='181'/> + <part name='24' x='470' y='180'/> + <part name='25' x='475' y='181'/> + <part name='26' x='479' y='184'/> + <part name='27' x='456' y='187'/> + <part name='28' x='455' y='190'/> + <part name='29' x='454' y='193'/> + <part name='30' x='453' y='197'/> + <part name='31' x='451' y='199'/> + <part name='32' x='452' y='200'/> + <part name='33' x='455' y='201'/> + <part name='34' x='458' y='200'/> + <part name='35' x='461' y='200'/> + <part name='36' x='447' y='187'/> + <part name='37' x='449' y='186'/> + <part name='38' x='451' y='186'/> + <part name='39' x='454' y='188'/> + <part name='40' x='451' y='188'/> + <part name='41' x='449' y='188'/> + <part name='42' x='465' y='188'/> + <part name='43' x='467' y='186'/> + <part name='44' x='470' y='186'/> + <part name='45' x='473' y='187'/> + <part name='46' x='470' y='188'/> + <part name='47' x='467' y='188'/> + <part name='48' x='449' y='206'/> + <part name='49' x='451' y='205'/> + <part name='50' x='453' y='204'/> + <part name='51' x='456' y='205'/> + <part name='52' x='458' y='204'/> + <part name='53' x='463' y='205'/> + <part name='54' x='470' y='206'/> + <part name='55' x='464' y='210'/> + <part name='56' x='459' y='211'/> + <part name='57' x='456' y='211'/> + <part name='58' x='454' y='211'/> + <part name='59' x='451' y='209'/> + <part name='60' x='450' y='206'/> + <part name='61' x='453' y='206'/> + <part name='62' x='456' y='206'/> + <part name='63' x='458' y='205'/> + <part name='64' x='468' y='206'/> + <part name='65' x='459' y='209'/> + <part name='66' x='456' y='209'/> + <part name='67' x='454' y='209'/> + </box> + <box top='134' left='62' width='37' height='37'> + <part name='00' x='59' y='147'/> + <part name='01' x='60' y='153'/> + <part name='02' x='62' y='158'/> + <part name='03' x='64' y='163'/> + <part name='04' x='67' y='167'/> + <part name='05' x='72' y='170'/> + <part name='06' x='77' y='171'/> + <part name='07' x='83' y='173'/> + <part name='08' x='87' y='172'/> + <part name='09' x='90' y='171'/> + <part name='10' x='92' y='167'/> + <part name='11' x='92' y='163'/> + <part name='12' x='93' y='159'/> + <part name='13' x='94' y='155'/> + <part name='14' x='94' y='151'/> + <part name='15' x='94' y='148'/> + <part name='16' x='93' y='144'/> + <part name='17' x='67' y='141'/> + <part name='18' x='70' y='138'/> + <part name='19' x='73' y='137'/> + <part name='20' x='77' y='136'/> + <part name='21' x='81' y='137'/> + <part name='22' x='87' y='137'/> + <part name='23' x='89' y='136'/> + <part name='24' x='91' y='136'/> + <part name='25' x='92' y='136'/> + <part name='26' x='93' y='138'/> + <part name='27' x='85' y='142'/> + <part name='28' x='86' y='144'/> + <part name='29' x='87' y='147'/> + <part name='30' x='89' y='150'/> + <part name='31' x='83' y='153'/> + <part name='32' x='85' y='153'/> + <part name='33' x='87' y='154'/> + <part name='34' x='89' y='153'/> + <part name='35' x='90' y='152'/> + <part name='36' x='72' y='144'/> + <part name='37' x='74' y='142'/> + <part name='38' x='76' y='142'/> + <part name='39' x='78' y='143'/> + <part name='40' x='76' y='144'/> + <part name='41' x='74' y='144'/> + <part name='42' x='86' y='143'/> + <part name='43' x='88' y='142'/> + <part name='44' x='90' y='142'/> + <part name='45' x='92' y='143'/> + <part name='46' x='90' y='144'/> + <part name='47' x='89' y='144'/> + <part name='48' x='78' y='159'/> + <part name='49' x='82' y='157'/> + <part name='50' x='85' y='157'/> + <part name='51' x='87' y='157'/> + <part name='52' x='88' y='157'/> + <part name='53' x='90' y='157'/> + <part name='54' x='90' y='159'/> + <part name='55' x='90' y='161'/> + <part name='56' x='88' y='163'/> + <part name='57' x='87' y='163'/> + <part name='58' x='85' y='163'/> + <part name='59' x='82' y='162'/> + <part name='60' x='79' y='160'/> + <part name='61' x='85' y='158'/> + <part name='62' x='87' y='158'/> + <part name='63' x='88' y='158'/> + <part name='64' x='90' y='159'/> + <part name='65' x='88' y='160'/> + <part name='66' x='87' y='161'/> + <part name='67' x='85' y='161'/> + </box> + <box top='194' left='41' width='44' height='44'> + <part name='00' x='34' y='207'/> + <part name='01' x='35' y='214'/> + <part name='02' x='36' y='221'/> + <part name='03' x='39' y='227'/> + <part name='04' x='43' y='232'/> + <part name='05' x='48' y='236'/> + <part name='06' x='55' y='240'/> + <part name='07' x='62' y='242'/> + <part name='08' x='68' y='242'/> + <part name='09' x='72' y='240'/> + <part name='10' x='74' y='235'/> + <part name='11' x='75' y='230'/> + <part name='12' x='77' y='225'/> + <part name='13' x='78' y='220'/> + <part name='14' x='78' y='215'/> + <part name='15' x='78' y='210'/> + <part name='16' x='77' y='205'/> + <part name='17' x='46' y='204'/> + <part name='18' x='50' y='201'/> + <part name='19' x='55' y='200'/> + <part name='20' x='60' y='199'/> + <part name='21' x='65' y='200'/> + <part name='22' x='70' y='199'/> + <part name='23' x='72' y='197'/> + <part name='24' x='75' y='196'/> + <part name='25' x='77' y='196'/> + <part name='26' x='78' y='197'/> + <part name='27' x='69' y='205'/> + <part name='28' x='70' y='209'/> + <part name='29' x='72' y='212'/> + <part name='30' x='73' y='216'/> + <part name='31' x='67' y='219'/> + <part name='32' x='69' y='220'/> + <part name='33' x='71' y='220'/> + <part name='34' x='73' y='220'/> + <part name='35' x='74' y='218'/> + <part name='36' x='53' y='207'/> + <part name='37' x='56' y='206'/> + <part name='38' x='59' y='205'/> + <part name='39' x='61' y='206'/> + <part name='40' x='59' y='207'/> + <part name='41' x='56' y='208'/> + <part name='42' x='70' y='205'/> + <part name='43' x='72' y='203'/> + <part name='44' x='74' y='202'/> + <part name='45' x='75' y='203'/> + <part name='46' x='75' y='204'/> + <part name='47' x='73' y='205'/> + <part name='48' x='61' y='228'/> + <part name='49' x='65' y='226'/> + <part name='50' x='69' y='225'/> + <part name='51' x='70' y='225'/> + <part name='52' x='72' y='224'/> + <part name='53' x='73' y='225'/> + <part name='54' x='73' y='226'/> + <part name='55' x='73' y='228'/> + <part name='56' x='72' y='230'/> + <part name='57' x='70' y='230'/> + <part name='58' x='68' y='230'/> + <part name='59' x='65' y='230'/> + <part name='60' x='63' y='228'/> + <part name='61' x='69' y='227'/> + <part name='62' x='70' y='227'/> + <part name='63' x='72' y='226'/> + <part name='64' x='72' y='226'/> + <part name='65' x='72' y='226'/> + <part name='66' x='70' y='227'/> + <part name='67' x='68' y='227'/> + </box> + </image> + <image file='2008_001009.jpg'> + <box top='79' left='145' width='76' height='76'> + <part name='00' x='145' y='115'/> + <part name='01' x='148' y='124'/> + <part name='02' x='151' y='133'/> + <part name='03' x='154' y='141'/> + <part name='04' x='160' y='148'/> + <part name='05' x='167' y='154'/> + <part name='06' x='174' y='159'/> + <part name='07' x='183' y='163'/> + <part name='08' x='191' y='163'/> + <part name='09' x='199' y='161'/> + <part name='10' x='205' y='154'/> + <part name='11' x='210' y='147'/> + <part name='12' x='214' y='139'/> + <part name='13' x='216' y='130'/> + <part name='14' x='216' y='120'/> + <part name='15' x='215' y='111'/> + <part name='16' x='214' y='102'/> + <part name='17' x='152' y='104'/> + <part name='18' x='155' y='99'/> + <part name='19' x='161' y='95'/> + <part name='20' x='167' y='95'/> + <part name='21' x='173' y='96'/> + <part name='22' x='186' y='94'/> + <part name='23' x='190' y='90'/> + <part name='24' x='196' y='89'/> + <part name='25' x='201' y='90'/> + <part name='26' x='206' y='94'/> + <part name='27' x='181' y='102'/> + <part name='28' x='182' y='107'/> + <part name='29' x='184' y='111'/> + <part name='30' x='185' y='116'/> + <part name='31' x='179' y='122'/> + <part name='32' x='182' y='123'/> + <part name='33' x='186' y='123'/> + <part name='34' x='189' y='121'/> + <part name='35' x='192' y='119'/> + <part name='36' x='161' y='108'/> + <part name='37' x='164' y='105'/> + <part name='38' x='168' y='104'/> + <part name='39' x='172' y='106'/> + <part name='40' x='169' y='107'/> + <part name='41' x='165' y='108'/> + <part name='42' x='190' y='103'/> + <part name='43' x='193' y='100'/> + <part name='44' x='197' y='99'/> + <part name='45' x='201' y='100'/> + <part name='46' x='198' y='102'/> + <part name='47' x='194' y='103'/> + <part name='48' x='171' y='138'/> + <part name='49' x='176' y='132'/> + <part name='50' x='182' y='129'/> + <part name='51' x='187' y='129'/> + <part name='52' x='191' y='127'/> + <part name='53' x='197' y='128'/> + <part name='54' x='202' y='133'/> + <part name='55' x='199' y='144'/> + <part name='56' x='193' y='148'/> + <part name='57' x='189' y='149'/> + <part name='58' x='184' y='150'/> + <part name='59' x='177' y='147'/> + <part name='60' x='174' y='138'/> + <part name='61' x='183' y='131'/> + <part name='62' x='187' y='131'/> + <part name='63' x='191' y='130'/> + <part name='64' x='199' y='134'/> + <part name='65' x='192' y='143'/> + <part name='66' x='188' y='144'/> + <part name='67' x='183' y='145'/> + </box> + <box top='214' left='125' width='90' height='91'> + <part name='00' x='133' y='248'/> + <part name='01' x='134' y='257'/> + <part name='02' x='137' y='267'/> + <part name='03' x='140' y='277'/> + <part name='04' x='145' y='286'/> + <part name='05' x='152' y='294'/> + <part name='06' x='159' y='300'/> + <part name='07' x='168' y='305'/> + <part name='08' x='178' y='305'/> + <part name='09' x='189' y='302'/> + <part name='10' x='198' y='296'/> + <part name='11' x='207' y='289'/> + <part name='12' x='213' y='280'/> + <part name='13' x='216' y='269'/> + <part name='14' x='217' y='257'/> + <part name='15' x='216' y='245'/> + <part name='16' x='215' y='233'/> + <part name='17' x='135' y='236'/> + <part name='18' x='138' y='231'/> + <part name='19' x='143' y='228'/> + <part name='20' x='149' y='227'/> + <part name='21' x='155' y='228'/> + <part name='22' x='171' y='224'/> + <part name='23' x='177' y='220'/> + <part name='24' x='185' y='218'/> + <part name='25' x='192' y='219'/> + <part name='26' x='199' y='223'/> + <part name='27' x='164' y='236'/> + <part name='28' x='165' y='241'/> + <part name='29' x='165' y='246'/> + <part name='30' x='166' y='251'/> + <part name='31' x='160' y='260'/> + <part name='32' x='164' y='260'/> + <part name='33' x='169' y='260'/> + <part name='34' x='173' y='258'/> + <part name='35' x='177' y='257'/> + <part name='36' x='144' y='243'/> + <part name='37' x='147' y='239'/> + <part name='38' x='151' y='238'/> + <part name='39' x='156' y='240'/> + <part name='40' x='152' y='241'/> + <part name='41' x='148' y='242'/> + <part name='42' x='178' y='235'/> + <part name='43' x='181' y='232'/> + <part name='44' x='186' y='231'/> + <part name='45' x='191' y='232'/> + <part name='46' x='187' y='233'/> + <part name='47' x='182' y='234'/> + <part name='48' x='157' y='277'/> + <part name='49' x='161' y='271'/> + <part name='50' x='166' y='267'/> + <part name='51' x='171' y='267'/> + <part name='52' x='175' y='266'/> + <part name='53' x='184' y='266'/> + <part name='54' x='192' y='271'/> + <part name='55' x='186' y='280'/> + <part name='56' x='179' y='283'/> + <part name='57' x='174' y='285'/> + <part name='58' x='169' y='285'/> + <part name='59' x='162' y='284'/> + <part name='60' x='159' y='276'/> + <part name='61' x='167' y='270'/> + <part name='62' x='171' y='270'/> + <part name='63' x='176' y='269'/> + <part name='64' x='190' y='271'/> + <part name='65' x='177' y='279'/> + <part name='66' x='173' y='280'/> + <part name='67' x='168' y='280'/> + </box> + </image> + <image file='2008_001322.jpg'> + <box top='162' left='104' width='76' height='76'> + <part name='00' x='106' y='183'/> + <part name='01' x='106' y='193'/> + <part name='02' x='106' y='203'/> + <part name='03' x='106' y='213'/> + <part name='04' x='107' y='222'/> + <part name='05' x='111' y='230'/> + <part name='06' x='117' y='238'/> + <part name='07' x='125' y='243'/> + <part name='08' x='135' y='245'/> + <part name='09' x='144' y='245'/> + <part name='10' x='152' y='241'/> + <part name='11' x='158' y='235'/> + <part name='12' x='163' y='228'/> + <part name='13' x='167' y='220'/> + <part name='14' x='169' y='211'/> + <part name='15' x='172' y='202'/> + <part name='16' x='174' y='192'/> + <part name='17' x='114' y='175'/> + <part name='18' x='118' y='169'/> + <part name='19' x='124' y='166'/> + <part name='20' x='132' y='167'/> + <part name='21' x='139' y='169'/> + <part name='22' x='151' y='170'/> + <part name='23' x='158' y='170'/> + <part name='24' x='164' y='172'/> + <part name='25' x='170' y='175'/> + <part name='26' x='171' y='182'/> + <part name='27' x='145' y='178'/> + <part name='28' x='144' y='183'/> + <part name='29' x='144' y='188'/> + <part name='30' x='144' y='193'/> + <part name='31' x='135' y='200'/> + <part name='32' x='138' y='201'/> + <part name='33' x='142' y='202'/> + <part name='34' x='146' y='202'/> + <part name='35' x='149' y='201'/> + <part name='36' x='121' y='180'/> + <part name='37' x='125' y='178'/> + <part name='38' x='129' y='178'/> + <part name='39' x='133' y='181'/> + <part name='40' x='129' y='181'/> + <part name='41' x='125' y='181'/> + <part name='42' x='153' y='183'/> + <part name='43' x='157' y='181'/> + <part name='44' x='161' y='182'/> + <part name='45' x='163' y='185'/> + <part name='46' x='160' y='185'/> + <part name='47' x='156' y='184'/> + <part name='48' x='124' y='217'/> + <part name='49' x='130' y='214'/> + <part name='50' x='136' y='213'/> + <part name='51' x='140' y='214'/> + <part name='52' x='144' y='214'/> + <part name='53' x='148' y='216'/> + <part name='54' x='152' y='221'/> + <part name='55' x='147' y='223'/> + <part name='56' x='143' y='224'/> + <part name='57' x='139' y='223'/> + <part name='58' x='134' y='223'/> + <part name='59' x='129' y='221'/> + <part name='60' x='126' y='217'/> + <part name='61' x='136' y='217'/> + <part name='62' x='140' y='218'/> + <part name='63' x='144' y='217'/> + <part name='64' x='150' y='220'/> + <part name='65' x='143' y='220'/> + <part name='66' x='139' y='220'/> + <part name='67' x='135' y='219'/> + </box> + <box top='218' left='232' width='63' height='63'> + <part name='00' x='234' y='235'/> + <part name='01' x='233' y='243'/> + <part name='02' x='234' y='251'/> + <part name='03' x='235' y='259'/> + <part name='04' x='237' y='267'/> + <part name='05' x='241' y='274'/> + <part name='06' x='246' y='281'/> + <part name='07' x='253' y='286'/> + <part name='08' x='261' y='287'/> + <part name='09' x='270' y='286'/> + <part name='10' x='278' y='282'/> + <part name='11' x='285' y='276'/> + <part name='12' x='290' y='269'/> + <part name='13' x='293' y='261'/> + <part name='14' x='294' y='252'/> + <part name='15' x='295' y='243'/> + <part name='16' x='296' y='234'/> + <part name='17' x='237' y='228'/> + <part name='18' x='239' y='225'/> + <part name='19' x='244' y='224'/> + <part name='20' x='249' y='225'/> + <part name='21' x='253' y='227'/> + <part name='22' x='265' y='226'/> + <part name='23' x='270' y='223'/> + <part name='24' x='276' y='222'/> + <part name='25' x='282' y='223'/> + <part name='26' x='286' y='227'/> + <part name='27' x='259' y='232'/> + <part name='28' x='258' y='237'/> + <part name='29' x='258' y='243'/> + <part name='30' x='257' y='248'/> + <part name='31' x='253' y='252'/> + <part name='32' x='256' y='253'/> + <part name='33' x='259' y='254'/> + <part name='34' x='262' y='253'/> + <part name='35' x='265' y='252'/> + <part name='36' x='242' y='233'/> + <part name='37' x='245' y='231'/> + <part name='38' x='248' y='231'/> + <part name='39' x='252' y='234'/> + <part name='40' x='248' y='234'/> + <part name='41' x='245' y='234'/> + <part name='42' x='269' y='233'/> + <part name='43' x='272' y='230'/> + <part name='44' x='276' y='230'/> + <part name='45' x='279' y='232'/> + <part name='46' x='276' y='233'/> + <part name='47' x='272' y='233'/> + <part name='48' x='247' y='262'/> + <part name='49' x='250' y='260'/> + <part name='50' x='255' y='260'/> + <part name='51' x='259' y='260'/> + <part name='52' x='263' y='260'/> + <part name='53' x='270' y='260'/> + <part name='54' x='276' y='262'/> + <part name='55' x='270' y='268'/> + <part name='56' x='264' y='270'/> + <part name='57' x='260' y='271'/> + <part name='58' x='255' y='271'/> + <part name='59' x='251' y='268'/> + <part name='60' x='249' y='262'/> + <part name='61' x='255' y='262'/> + <part name='62' x='259' y='262'/> + <part name='63' x='263' y='262'/> + <part name='64' x='275' y='262'/> + <part name='65' x='264' y='267'/> + <part name='66' x='260' y='267'/> + <part name='67' x='255' y='267'/> + </box> + <box top='155' left='344' width='90' height='90'> + <part name='00' x='357' y='199'/> + <part name='01' x='359' y='209'/> + <part name='02' x='361' y='219'/> + <part name='03' x='364' y='229'/> + <part name='04' x='370' y='238'/> + <part name='05' x='379' y='245'/> + <part name='06' x='388' y='251'/> + <part name='07' x='397' y='256'/> + <part name='08' x='407' y='256'/> + <part name='09' x='417' y='253'/> + <part name='10' x='425' y='246'/> + <part name='11' x='432' y='238'/> + <part name='12' x='438' y='228'/> + <part name='13' x='441' y='217'/> + <part name='14' x='441' y='205'/> + <part name='15' x='439' y='192'/> + <part name='16' x='437' y='180'/> + <part name='17' x='360' y='188'/> + <part name='18' x='361' y='181'/> + <part name='19' x='367' y='178'/> + <part name='20' x='373' y='177'/> + <part name='21' x='380' y='178'/> + <part name='22' x='396' y='175'/> + <part name='23' x='404' y='171'/> + <part name='24' x='411' y='169'/> + <part name='25' x='418' y='169'/> + <part name='26' x='423' y='173'/> + <part name='27' x='390' y='185'/> + <part name='28' x='391' y='192'/> + <part name='29' x='392' y='198'/> + <part name='30' x='393' y='205'/> + <part name='31' x='387' y='210'/> + <part name='32' x='391' y='212'/> + <part name='33' x='396' y='212'/> + <part name='34' x='401' y='210'/> + <part name='35' x='405' y='207'/> + <part name='36' x='369' y='191'/> + <part name='37' x='373' y='188'/> + <part name='38' x='377' y='187'/> + <part name='39' x='381' y='188'/> + <part name='40' x='378' y='190'/> + <part name='41' x='373' y='191'/> + <part name='42' x='403' y='184'/> + <part name='43' x='406' y='181'/> + <part name='44' x='410' y='180'/> + <part name='45' x='414' y='182'/> + <part name='46' x='411' y='183'/> + <part name='47' x='407' y='184'/> + <part name='48' x='384' y='224'/> + <part name='49' x='388' y='220'/> + <part name='50' x='393' y='219'/> + <part name='51' x='398' y='218'/> + <part name='52' x='403' y='217'/> + <part name='53' x='411' y='216'/> + <part name='54' x='418' y='217'/> + <part name='55' x='413' y='225'/> + <part name='56' x='407' y='229'/> + <part name='57' x='401' y='231'/> + <part name='58' x='396' y='231'/> + <part name='59' x='390' y='230'/> + <part name='60' x='386' y='224'/> + <part name='61' x='394' y='220'/> + <part name='62' x='399' y='220'/> + <part name='63' x='404' y='219'/> + <part name='64' x='416' y='218'/> + <part name='65' x='405' y='225'/> + <part name='66' x='400' y='226'/> + <part name='67' x='395' y='227'/> + </box> + </image> +</images> +</dataset> diff --git a/ml/dlib/examples/fhog_ex.cpp b/ml/dlib/examples/fhog_ex.cpp new file mode 100644 index 00000000..1e8d5a80 --- /dev/null +++ b/ml/dlib/examples/fhog_ex.cpp @@ -0,0 +1,88 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the extract_fhog_features() routine from + the dlib C++ Library. + + + The extract_fhog_features() routine performs the style of HOG feature extraction + described in the paper: + Object Detection with Discriminatively Trained Part Based Models by + P. Felzenszwalb, R. Girshick, D. McAllester, D. Ramanan + IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 32, No. 9, Sep. 2010 + This means that it takes an input image and outputs Felzenszwalb's + 31 dimensional version of HOG features. We show its use below. +*/ + + + +#include <dlib/gui_widgets.h> +#include <dlib/image_io.h> +#include <dlib/image_transforms.h> + + +using namespace std; +using namespace dlib; + +// ---------------------------------------------------------------------------- + +int main(int argc, char** argv) +{ + try + { + // Make sure the user entered an argument to this program. It should be the + // filename for an image. + if (argc != 2) + { + cout << "error, you have to enter a BMP file as an argument to this program" << endl; + return 1; + } + + // Here we declare an image object that can store color rgb_pixels. + array2d<rgb_pixel> img; + + // Now load the image file into our image. If something is wrong then + // load_image() will throw an exception. Also, if you linked with libpng + // and libjpeg then load_image() can load PNG and JPEG files in addition + // to BMP files. + load_image(img, argv[1]); + + + // Now convert the image into a FHOG feature image. The output, hog, is a 2D array + // of 31 dimensional vectors. + array2d<matrix<float,31,1> > hog; + extract_fhog_features(img, hog); + + cout << "hog image has " << hog.nr() << " rows and " << hog.nc() << " columns." << endl; + + // Let's see what the image and FHOG features look like. + image_window win(img); + image_window winhog(draw_fhog(hog)); + + // Another thing you might want to do is map between the pixels in img and the + // cells in the hog image. dlib provides the image_to_fhog() and fhog_to_image() + // routines for this. Their use is demonstrated in the following loop which + // responds to the user clicking on pixels in the image img. + point p; // A 2D point, used to represent pixel locations. + while (win.get_next_double_click(p)) + { + point hp = image_to_fhog(p); + cout << "The point " << p << " in the input image corresponds to " << hp << " in hog space." << endl; + cout << "FHOG features at this point: " << trans(hog[hp.y()][hp.x()]) << endl; + } + + // Finally, sometimes you want to get a planar representation of the HOG features + // rather than the explicit vector (i.e. interlaced) representation used above. + dlib::array<array2d<float> > planar_hog; + extract_fhog_features(img, planar_hog); + // Now we have an array of 31 float valued image planes, each representing one of + // the dimensions of the HOG feature vector. + } + catch (exception& e) + { + cout << "exception thrown: " << e.what() << endl; + } +} + +// ---------------------------------------------------------------------------- + diff --git a/ml/dlib/examples/fhog_object_detector_ex.cpp b/ml/dlib/examples/fhog_object_detector_ex.cpp new file mode 100644 index 00000000..152f57d0 --- /dev/null +++ b/ml/dlib/examples/fhog_object_detector_ex.cpp @@ -0,0 +1,269 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This example program shows how you can use dlib to make an object detector + for things like faces, pedestrians, and any other semi-rigid object. In + particular, we go though the steps to train the kind of sliding window + object detector first published by Dalal and Triggs in 2005 in the paper + Histograms of Oriented Gradients for Human Detection. + + Note that this program executes fastest when compiled with at least SSE2 + instructions enabled. So if you are using a PC with an Intel or AMD chip + then you should enable at least SSE2 instructions. If you are using cmake + to compile this program you can enable them by using one of the following + commands when you create the build project: + cmake path_to_dlib_root/examples -DUSE_SSE2_INSTRUCTIONS=ON + cmake path_to_dlib_root/examples -DUSE_SSE4_INSTRUCTIONS=ON + cmake path_to_dlib_root/examples -DUSE_AVX_INSTRUCTIONS=ON + This will set the appropriate compiler options for GCC, clang, Visual + Studio, or the Intel compiler. If you are using another compiler then you + need to consult your compiler's manual to determine how to enable these + instructions. Note that AVX is the fastest but requires a CPU from at least + 2011. SSE4 is the next fastest and is supported by most current machines. + +*/ + + +#include <dlib/svm_threaded.h> +#include <dlib/gui_widgets.h> +#include <dlib/image_processing.h> +#include <dlib/data_io.h> + +#include <iostream> +#include <fstream> + + +using namespace std; +using namespace dlib; + +// ---------------------------------------------------------------------------------------- + +int main(int argc, char** argv) +{ + + try + { + // In this example we are going to train a face detector based on the + // small faces dataset in the examples/faces directory. So the first + // thing we do is load that dataset. This means you need to supply the + // path to this faces folder as a command line argument so we will know + // where it is. + if (argc != 2) + { + cout << "Give the path to the examples/faces directory as the argument to this" << endl; + cout << "program. For example, if you are in the examples folder then execute " << endl; + cout << "this program by running: " << endl; + cout << " ./fhog_object_detector_ex faces" << endl; + cout << endl; + return 0; + } + const std::string faces_directory = argv[1]; + // The faces directory contains a training dataset and a separate + // testing dataset. The training data consists of 4 images, each + // annotated with rectangles that bound each human face. The idea is + // to use this training data to learn to identify human faces in new + // images. + // + // Once you have trained an object detector it is always important to + // test it on data it wasn't trained on. Therefore, we will also load + // a separate testing set of 5 images. Once we have a face detector + // created from the training data we will see how well it works by + // running it on the testing images. + // + // So here we create the variables that will hold our dataset. + // images_train will hold the 4 training images and face_boxes_train + // holds the locations of the faces in the training images. So for + // example, the image images_train[0] has the faces given by the + // rectangles in face_boxes_train[0]. + dlib::array<array2d<unsigned char> > images_train, images_test; + std::vector<std::vector<rectangle> > face_boxes_train, face_boxes_test; + + // Now we load the data. These XML files list the images in each + // dataset and also contain the positions of the face boxes. Obviously + // you can use any kind of input format you like so long as you store + // the data into images_train and face_boxes_train. But for convenience + // dlib comes with tools for creating and loading XML image dataset + // files. Here you see how to load the data. To create the XML files + // you can use the imglab tool which can be found in the tools/imglab + // folder. It is a simple graphical tool for labeling objects in images + // with boxes. To see how to use it read the tools/imglab/README.txt + // file. + load_image_dataset(images_train, face_boxes_train, faces_directory+"/training.xml"); + load_image_dataset(images_test, face_boxes_test, faces_directory+"/testing.xml"); + + // Now we do a little bit of pre-processing. This is optional but for + // this training data it improves the results. The first thing we do is + // increase the size of the images by a factor of two. We do this + // because it will allow us to detect smaller faces than otherwise would + // be practical (since the faces are all now twice as big). Note that, + // in addition to resizing the images, these functions also make the + // appropriate adjustments to the face boxes so that they still fall on + // top of the faces after the images are resized. + upsample_image_dataset<pyramid_down<2> >(images_train, face_boxes_train); + upsample_image_dataset<pyramid_down<2> >(images_test, face_boxes_test); + // Since human faces are generally left-right symmetric we can increase + // our training dataset by adding mirrored versions of each image back + // into images_train. So this next step doubles the size of our + // training dataset. Again, this is obviously optional but is useful in + // many object detection tasks. + add_image_left_right_flips(images_train, face_boxes_train); + cout << "num training images: " << images_train.size() << endl; + cout << "num testing images: " << images_test.size() << endl; + + + // Finally we get to the training code. dlib contains a number of + // object detectors. This typedef tells it that you want to use the one + // based on Felzenszwalb's version of the Histogram of Oriented + // Gradients (commonly called HOG) detector. The 6 means that you want + // it to use an image pyramid that downsamples the image at a ratio of + // 5/6. Recall that HOG detectors work by creating an image pyramid and + // then running the detector over each pyramid level in a sliding window + // fashion. + typedef scan_fhog_pyramid<pyramid_down<6> > image_scanner_type; + image_scanner_type scanner; + // The sliding window detector will be 80 pixels wide and 80 pixels tall. + scanner.set_detection_window_size(80, 80); + structural_object_detection_trainer<image_scanner_type> trainer(scanner); + // Set this to the number of processing cores on your machine. + trainer.set_num_threads(4); + // The trainer is a kind of support vector machine and therefore has the usual SVM + // C parameter. In general, a bigger C encourages it to fit the training data + // better but might lead to overfitting. You must find the best C value + // empirically by checking how well the trained detector works on a test set of + // images you haven't trained on. Don't just leave the value set at 1. Try a few + // different C values and see what works best for your data. + trainer.set_c(1); + // We can tell the trainer to print it's progress to the console if we want. + trainer.be_verbose(); + // The trainer will run until the "risk gap" is less than 0.01. Smaller values + // make the trainer solve the SVM optimization problem more accurately but will + // take longer to train. For most problems a value in the range of 0.1 to 0.01 is + // plenty accurate. Also, when in verbose mode the risk gap is printed on each + // iteration so you can see how close it is to finishing the training. + trainer.set_epsilon(0.01); + + + // Now we run the trainer. For this example, it should take on the order of 10 + // seconds to train. + object_detector<image_scanner_type> detector = trainer.train(images_train, face_boxes_train); + + // Now that we have a face detector we can test it. The first statement tests it + // on the training data. It will print the precision, recall, and then average precision. + cout << "training results: " << test_object_detection_function(detector, images_train, face_boxes_train) << endl; + // However, to get an idea if it really worked without overfitting we need to run + // it on images it wasn't trained on. The next line does this. Happily, we see + // that the object detector works perfectly on the testing images. + cout << "testing results: " << test_object_detection_function(detector, images_test, face_boxes_test) << endl; + + + // If you have read any papers that use HOG you have probably seen the nice looking + // "sticks" visualization of a learned HOG detector. This next line creates a + // window with such a visualization of our detector. It should look somewhat like + // a face. + image_window hogwin(draw_fhog(detector), "Learned fHOG detector"); + + // Now for the really fun part. Let's display the testing images on the screen and + // show the output of the face detector overlaid on each image. You will see that + // it finds all the faces without false alarming on any non-faces. + image_window win; + for (unsigned long i = 0; i < images_test.size(); ++i) + { + // Run the detector and get the face detections. + std::vector<rectangle> dets = detector(images_test[i]); + win.clear_overlay(); + win.set_image(images_test[i]); + win.add_overlay(dets, rgb_pixel(255,0,0)); + cout << "Hit enter to process the next image..." << endl; + cin.get(); + } + + + // Like everything in dlib, you can save your detector to disk using the + // serialize() function. + serialize("face_detector.svm") << detector; + + // Then you can recall it using the deserialize() function. + object_detector<image_scanner_type> detector2; + deserialize("face_detector.svm") >> detector2; + + + + + // Now let's talk about some optional features of this training tool as well as some + // important points you should understand. + // + // The first thing that should be pointed out is that, since this is a sliding + // window classifier, it can't output an arbitrary rectangle as a detection. In + // this example our sliding window is 80 by 80 pixels and is run over an image + // pyramid. This means that it can only output detections that are at least 80 by + // 80 pixels in size (recall that this is why we upsampled the images after loading + // them). It also means that the aspect ratio of the outputs is 1. So if, + // for example, you had a box in your training data that was 200 pixels by 10 + // pixels then it would simply be impossible for the detector to learn to detect + // it. Similarly, if you had a really small box it would be unable to learn to + // detect it. + // + // So the training code performs an input validation check on the training data and + // will throw an exception if it detects any boxes that are impossible to detect + // given your setting of scanning window size and image pyramid resolution. You + // can use a statement like: + // remove_unobtainable_rectangles(trainer, images_train, face_boxes_train) + // to automatically discard these impossible boxes from your training dataset + // before running the trainer. This will avoid getting the "impossible box" + // exception. However, I would recommend you be careful that you are not throwing + // away truth boxes you really care about. The remove_unobtainable_rectangles() + // will return the set of removed rectangles so you can visually inspect them and + // make sure you are OK that they are being removed. + // + // Next, note that any location in the images not marked with a truth box is + // implicitly treated as a negative example. This means that when creating + // training data it is critical that you label all the objects you want to detect. + // So for example, if you are making a face detector then you must mark all the + // faces in each image. However, sometimes there are objects in images you are + // unsure about or simply don't care if the detector identifies or not. For these + // objects you can pass in a set of "ignore boxes" as a third argument to the + // trainer.train() function. The trainer will simply disregard any detections that + // happen to hit these boxes. + // + // Another useful thing you can do is evaluate multiple HOG detectors together. The + // benefit of this is increased testing speed since it avoids recomputing the HOG + // features for each run of the detector. You do this by storing your detectors + // into a std::vector and then invoking evaluate_detectors() like so: + std::vector<object_detector<image_scanner_type> > my_detectors; + my_detectors.push_back(detector); + std::vector<rectangle> dets = evaluate_detectors(my_detectors, images_train[0]); + // + // + // Finally, you can add a nuclear norm regularizer to the SVM trainer. Doing has + // two benefits. First, it can cause the learned HOG detector to be composed of + // separable filters and therefore makes it execute faster when detecting objects. + // It can also help with generalization since it tends to make the learned HOG + // filters smoother. To enable this option you call the following function before + // you create the trainer object: + // scanner.set_nuclear_norm_regularization_strength(1.0); + // The argument determines how important it is to have a small nuclear norm. A + // bigger regularization strength means it is more important. The smaller the + // nuclear norm the smoother and faster the learned HOG filters will be, but if the + // regularization strength value is too large then the SVM will not fit the data + // well. This is analogous to giving a C value that is too small. + // + // You can see how many separable filters are inside your detector like so: + cout << "num filters: "<< num_separable_filters(detector) << endl; + // You can also control how many filters there are by explicitly thresholding the + // singular values of the filters like this: + detector = threshold_filter_singular_values(detector,0.1); + // That removes filter components with singular values less than 0.1. The bigger + // this number the fewer separable filters you will have and the faster the + // detector will run. However, a large enough threshold will hurt detection + // accuracy. + + } + catch (exception& e) + { + cout << "\nexception thrown!" << endl; + cout << e.what() << endl; + } +} + +// ---------------------------------------------------------------------------------------- + diff --git a/ml/dlib/examples/file_to_code_ex.cpp b/ml/dlib/examples/file_to_code_ex.cpp new file mode 100644 index 00000000..ce49bde7 --- /dev/null +++ b/ml/dlib/examples/file_to_code_ex.cpp @@ -0,0 +1,111 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This is an example illustrating the use of the compress_stream and + base64 components from the dlib C++ Library. + + It reads in a file from the disk and compresses it in an in memory buffer and + then converts that buffer into base64 text. The final step is to output to + the screen some C++ code that contains this base64 encoded text and can decompress + it back into its original form. +*/ + + +#include <iostream> +#include <fstream> +#include <sstream> +#include <string> +#include <cstdlib> +#include <dlib/compress_stream.h> +#include <dlib/base64.h> + + +using namespace std; +using namespace dlib; + +int main(int argc, char** argv) +{ + if (argc != 2) + { + cout << "You must give a file name as the argument to this program.\n" << endl; + cout << "This program reads in a file from the disk and compresses\n" + << "it in an in memory buffer and then converts that buffer \n" + << "into base64 text. The final step is to output to the screen\n" + << "some C++ code that contains this base64 encoded text and can\n" + << "decompress it back into its original form.\n" << endl; + + return EXIT_FAILURE; + } + + // open the file the user specified on the command line + ifstream fin(argv[1], ios::binary); + if (!fin) { + cout << "can't open file " << argv[1] << endl; + return EXIT_FAILURE; + } + + ostringstream sout; + istringstream sin; + + // this is the object we will use to do the base64 encoding + base64 base64_coder; + // this is the object we will use to do the data compression + compress_stream::kernel_1ea compressor; + + // compress the contents of the file and store the results in the string stream sout + compressor.compress(fin,sout); + sin.str(sout.str()); + sout.clear(); + sout.str(""); + + // now base64 encode the compressed data + base64_coder.encode(sin,sout); + + sin.clear(); + sin.str(sout.str()); + sout.str(""); + + // the following is a little funny looking but all it does is output some C++ code + // that contains the compressed/base64 data and the C++ code that can decode it back + // into its original form. + sout << "#include <sstream>\n"; + sout << "#include <dlib/compress_stream.h>\n"; + sout << "#include <dlib/base64.h>\n"; + sout << "\n"; + sout << "// This function returns the contents of the file '" << argv[1] << "'\n"; + sout << "const std::string get_decoded_string()\n"; + sout << "{\n"; + sout << " dlib::base64 base64_coder;\n"; + sout << " dlib::compress_stream::kernel_1ea compressor;\n"; + sout << " std::ostringstream sout;\n"; + sout << " std::istringstream sin;\n\n"; + + + sout << " // The base64 encoded data from the file '" << argv[1] << "' we want to decode and return.\n"; + string temp; + getline(sin,temp); + while (sin && temp.size() > 0) + { + sout << " sout << \"" << temp << "\";\n"; + getline(sin,temp); + } + + sout << "\n"; + sout << " // Put the data into the istream sin\n"; + sout << " sin.str(sout.str());\n"; + sout << " sout.str(\"\");\n\n"; + sout << " // Decode the base64 text into its compressed binary form\n"; + sout << " base64_coder.decode(sin,sout);\n"; + sout << " sin.clear();\n"; + sout << " sin.str(sout.str());\n"; + sout << " sout.str(\"\");\n\n"; + sout << " // Decompress the data into its original form\n"; + sout << " compressor.decompress(sin,sout);\n\n"; + sout << " // Return the decoded and decompressed data\n"; + sout << " return sout.str();\n"; + sout << "}\n"; + + + // finally output our encoded data and its C++ code to the screen + cout << sout.str() << endl; +} + diff --git a/ml/dlib/examples/graph_labeling_ex.cpp b/ml/dlib/examples/graph_labeling_ex.cpp new file mode 100644 index 00000000..984a93bf --- /dev/null +++ b/ml/dlib/examples/graph_labeling_ex.cpp @@ -0,0 +1,259 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the graph_labeler and + structural_graph_labeling_trainer objects. + + Suppose you have a bunch of objects and you need to label each of them as true or + false. Suppose further that knowing the labels of some of these objects tells you + something about the likely label of the others. This is common in a number of domains. + For example, in image segmentation problems you need to label each pixel, and knowing + the labels of neighboring pixels gives you information about the likely label since + neighboring pixels will often have the same label. + + We can generalize this problem by saying that we have a graph and our task is to label + each node in the graph as true or false. Additionally, the edges in the graph connect + nodes which are likely to share the same label. In this example program, each node + will have a feature vector which contains information which helps tell if the node + should be labeled as true or false. The edges also contain feature vectors which give + information indicating how strong the edge's labeling consistency constraint should be. + This is useful since some nodes will have uninformative feature vectors and the only + way to tell how they should be labeled is by looking at their neighbor's labels. + + Therefore, this program will show you how to learn two things using machine learning. + The first is a linear classifier which operates on each node and predicts if it should + be labeled as true or false. The second thing is a linear function of the edge + vectors. This function outputs a penalty for giving two nodes connected by an edge + differing labels. The graph_labeler object puts these two things together and uses + them to compute a labeling which takes both into account. In what follows, we will use + a structural SVM method to find the parameters of these linear functions which minimize + the number of mistakes made by a graph_labeler. + + + Finally, you might also consider reading the book Structured Prediction and Learning in + Computer Vision by Sebastian Nowozin and Christoph H. Lampert since it contains a good + introduction to machine learning methods such as the algorithm implemented by the + structural_graph_labeling_trainer. +*/ + +#include <dlib/svm_threaded.h> +#include <iostream> + +using namespace std; +using namespace dlib; + +// ---------------------------------------------------------------------------------------- + +// The first thing we do is define the kind of graph object we will be using. +// Here we are saying there will be 2-D vectors at each node and 1-D vectors at +// each edge. (You should read the matrix_ex.cpp example program for an introduction +// to the matrix object.) +typedef matrix<double,2,1> node_vector_type; +typedef matrix<double,1,1> edge_vector_type; +typedef graph<node_vector_type, edge_vector_type>::kernel_1a_c graph_type; + +// ---------------------------------------------------------------------------------------- + +template < + typename graph_type, + typename labels_type + > +void make_training_examples( + dlib::array<graph_type>& samples, + labels_type& labels +) +{ + /* + This function makes 3 graphs we will use for training. All of them + will contain 4 nodes and have the structure shown below: + + (0)-----(1) + | | + | | + | | + (3)-----(2) + + In this example, each node has a 2-D vector. The first element of this vector + is 1 when the node should have a label of false while the second element has + a value of 1 when the node should have a label of true. Additionally, the + edge vectors will contain a value of 1 when the nodes connected by the edge + should share the same label and a value of 0 otherwise. + + We want to see that the machine learning method is able to figure out how + these features relate to the labels. If it is successful it will create a + graph_labeler which can predict the correct labels for these and other + similarly constructed graphs. + + Finally, note that these tools require all values in the edge vectors to be >= 0. + However, the node vectors may contain both positive and negative values. + */ + + samples.clear(); + labels.clear(); + + std::vector<bool> label; + graph_type g; + + // --------------------------- + g.set_number_of_nodes(4); + label.resize(g.number_of_nodes()); + // store the vector [0,1] into node 0. Also label it as true. + g.node(0).data = 0, 1; label[0] = true; + // store the vector [0,0] into node 1. + g.node(1).data = 0, 0; label[1] = true; // Note that this node's vector doesn't tell us how to label it. + // We need to take the edges into account to get it right. + // store the vector [1,0] into node 2. + g.node(2).data = 1, 0; label[2] = false; + // store the vector [0,0] into node 3. + g.node(3).data = 0, 0; label[3] = false; + + // Add the 4 edges as shown in the ASCII art above. + g.add_edge(0,1); + g.add_edge(1,2); + g.add_edge(2,3); + g.add_edge(3,0); + + // set the 1-D vector for the edge between node 0 and 1 to the value of 1. + edge(g,0,1) = 1; + // set the 1-D vector for the edge between node 1 and 2 to the value of 0. + edge(g,1,2) = 0; + edge(g,2,3) = 1; + edge(g,3,0) = 0; + // output the graph and its label. + samples.push_back(g); + labels.push_back(label); + + // --------------------------- + g.set_number_of_nodes(4); + label.resize(g.number_of_nodes()); + g.node(0).data = 0, 1; label[0] = true; + g.node(1).data = 0, 1; label[1] = true; + g.node(2).data = 1, 0; label[2] = false; + g.node(3).data = 1, 0; label[3] = false; + + g.add_edge(0,1); + g.add_edge(1,2); + g.add_edge(2,3); + g.add_edge(3,0); + + // This time, we have strong edges between all the nodes. The machine learning + // tools will have to learn that when the node information conflicts with the + // edge constraints that the node information should dominate. + edge(g,0,1) = 1; + edge(g,1,2) = 1; + edge(g,2,3) = 1; + edge(g,3,0) = 1; + samples.push_back(g); + labels.push_back(label); + // --------------------------- + + g.set_number_of_nodes(4); + label.resize(g.number_of_nodes()); + g.node(0).data = 1, 0; label[0] = false; + g.node(1).data = 1, 0; label[1] = false; + g.node(2).data = 1, 0; label[2] = false; + g.node(3).data = 0, 0; label[3] = false; + + g.add_edge(0,1); + g.add_edge(1,2); + g.add_edge(2,3); + g.add_edge(3,0); + + edge(g,0,1) = 0; + edge(g,1,2) = 0; + edge(g,2,3) = 1; + edge(g,3,0) = 0; + samples.push_back(g); + labels.push_back(label); + // --------------------------- + +} + +// ---------------------------------------------------------------------------------------- + +int main() +{ + try + { + // Get the training samples we defined above. + dlib::array<graph_type> samples; + std::vector<std::vector<bool> > labels; + make_training_examples(samples, labels); + + + // Create a structural SVM trainer for graph labeling problems. The vector_type + // needs to be set to a type capable of holding node or edge vectors. + typedef matrix<double,0,1> vector_type; + structural_graph_labeling_trainer<vector_type> trainer; + // This is the usual SVM C parameter. Larger values make the trainer try + // harder to fit the training data but might result in overfitting. You + // should set this value to whatever gives the best cross-validation results. + trainer.set_c(10); + + // Do 3-fold cross-validation and print the results. In this case it will + // indicate that all nodes were correctly classified. + cout << "3-fold cross-validation: " << cross_validate_graph_labeling_trainer(trainer, samples, labels, 3) << endl; + + // Since the trainer is working well. Let's have it make a graph_labeler + // based on the training data. + graph_labeler<vector_type> labeler = trainer.train(samples, labels); + + + /* + Let's try the graph_labeler on a new test graph. In particular, let's + use one with 5 nodes as shown below: + + (0 F)-----(1 T) + | | + | | + | | + (3 T)-----(2 T)------(4 T) + + I have annotated each node with either T or F to indicate the correct + output (true or false). + */ + graph_type g; + g.set_number_of_nodes(5); + g.node(0).data = 1, 0; // Node data indicates a false node. + g.node(1).data = 0, 1; // Node data indicates a true node. + g.node(2).data = 0, 0; // Node data is ambiguous. + g.node(3).data = 0, 0; // Node data is ambiguous. + g.node(4).data = 0.1, 0; // Node data slightly indicates a false node. + + g.add_edge(0,1); + g.add_edge(1,2); + g.add_edge(2,3); + g.add_edge(3,0); + g.add_edge(2,4); + + // Set the edges up so nodes 1, 2, 3, and 4 are all strongly connected. + edge(g,0,1) = 0; + edge(g,1,2) = 1; + edge(g,2,3) = 1; + edge(g,3,0) = 0; + edge(g,2,4) = 1; + + // The output of this shows all the nodes are correctly labeled. + cout << "Predicted labels: " << endl; + std::vector<bool> temp = labeler(g); + for (unsigned long i = 0; i < temp.size(); ++i) + cout << " " << i << ": " << temp[i] << endl; + + + + // Breaking the strong labeling consistency link between node 1 and 2 causes + // nodes 2, 3, and 4 to flip to false. This is because of their connection + // to node 4 which has a small preference for false. + edge(g,1,2) = 0; + cout << "Predicted labels: " << endl; + temp = labeler(g); + for (unsigned long i = 0; i < temp.size(); ++i) + cout << " " << i << ": " << temp[i] << endl; + } + catch (std::exception& e) + { + cout << "Error, an exception was thrown!" << endl; + cout << e.what() << endl; + } +} + diff --git a/ml/dlib/examples/gui_api_ex.cpp b/ml/dlib/examples/gui_api_ex.cpp new file mode 100644 index 00000000..4d947b75 --- /dev/null +++ b/ml/dlib/examples/gui_api_ex.cpp @@ -0,0 +1,231 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the gui api from the dlib C++ Library. + + + This is a pretty simple example. It makes a window with a user + defined widget (a draggable colored box) and a button. You can drag the + box around or click the button which increments a counter. +*/ + + + + +#include <dlib/gui_widgets.h> +#include <sstream> +#include <string> + + +using namespace std; +using namespace dlib; + +// ---------------------------------------------------------------------------- + +class color_box : public draggable +{ + /* + Here I am defining a custom drawable widget that is a colored box that + you can drag around on the screen. draggable is a special kind of drawable + object that, as the name implies, is draggable by the user via the mouse. + To make my color_box draggable all I need to do is inherit from draggable. + */ + unsigned char red, green,blue; + +public: + color_box ( + drawable_window& w, + rectangle area, + unsigned char red_, + unsigned char green_, + unsigned char blue_ + ) : + draggable(w), + red(red_), + green(green_), + blue(blue_) + { + rect = area; + set_draggable_area(rectangle(10,10,400,400)); + + // Whenever you make your own drawable widget (or inherit from any drawable widget + // or interface such as draggable) you have to remember to call this function to + // enable the events. The idea here is that you can perform whatever setup you + // need to do to get your object into a valid state without needing to worry about + // event handlers triggering before you are ready. + enable_events(); + } + + ~color_box ( + ) + { + // Disable all further events for this drawable object. We have to do this + // because we don't want any events (like draw()) coming to this object while or + // after it has been destructed. + disable_events(); + + // Tell the parent window to redraw its area that previously contained this + // drawable object. + parent.invalidate_rectangle(rect); + } + +private: + + void draw ( + const canvas& c + ) const + { + // The canvas is an object that represents a part of the parent window + // that needs to be redrawn. + + // The first thing I usually do is check if the draw call is for part + // of the window that overlaps with my widget. We don't have to do this + // but it is usually good to do as a speed hack. Also, the reason + // I don't have it set to only give you draw calls when it does indeed + // overlap is because you might want to do some drawing outside of your + // widget's rectangle. But usually you don't want to do that :) + rectangle area = c.intersect(rect); + if (area.is_empty() == true) + return; + + // This simple widget is just going to draw a box on the screen. + fill_rect(c,rect,rgb_pixel(red,green,blue)); + } +}; + +// ---------------------------------------------------------------------------- + +class win : public drawable_window +{ + /* + Here I am going to define our window. In general, you can define as + many window types as you like and make as many instances of them as you want. + In this example I am only making one though. + */ +public: + win( + ) : // All widgets take their parent window as an argument to their constructor. + c(*this), + b(*this), + cb(*this,rectangle(100,100,200,200),0,0,255), // the color_box will be blue and 101 pixels wide and tall + mbar(*this) + { + // tell our button to put itself at the position (10,60). + b.set_pos(10,60); + b.set_name("button"); + + // let's put the label 5 pixels below the button + c.set_pos(b.left(),b.bottom()+5); + + + // set which function should get called when the button gets clicked. In this case we want + // the on_button_clicked member to be called on *this. + b.set_click_handler(*this,&win::on_button_clicked); + // Alternatively, if you have a compiler which supports the lambda functions from the + // new C++ standard then you can use a lambda function instead of telling the click + // handler to call one of the member functions. So for example, you could do this + // instead (uncomment the code if you have C++0x support): + /* + b.set_click_handler([&](){ + ++counter; + ostringstream sout; + sout << "Counter: " << counter; + c.set_text(sout.str()); + }); + */ + // In general, all the functions which register events can take either member + // functions or lambda functions. + + + // Let's also make a simple menu bar. + // First we say how many menus we want in our menu bar. In this example we only want 1. + mbar.set_number_of_menus(1); + // Now we set the name of our menu. The 'M' means that the M in Menu will be underlined + // and the user will be able to select it by hitting alt+M + mbar.set_menu_name(0,"Menu",'M'); + + // Now we add some items to the menu. Note that items in a menu are listed in the + // order in which they were added. + + // First let's make a menu item that does the same thing as our button does when it is clicked. + // Again, the 'C' means the C in Click is underlined in the menu. + mbar.menu(0).add_menu_item(menu_item_text("Click Button!",*this,&win::on_button_clicked,'C')); + // let's add a separator (i.e. a horizontal separating line) to the menu + mbar.menu(0).add_menu_item(menu_item_separator()); + // Now let's make a menu item that calls show_about when the user selects it. + mbar.menu(0).add_menu_item(menu_item_text("About",*this,&win::show_about,'A')); + + + // set the size of this window + set_size(430,380); + + counter = 0; + + set_title("dlib gui example"); + show(); + } + + ~win( + ) + { + // You should always call close_window() in the destructor of window + // objects to ensure that no events will be sent to this window while + // it is being destructed. + close_window(); + } + +private: + + void on_button_clicked ( + ) + { + // when someone clicks our button it will increment the counter and + // display it in our label c. + ++counter; + ostringstream sout; + sout << "counter: " << counter; + c.set_text(sout.str()); + } + + void show_about( + ) + { + message_box("About","This is a dlib gui example program"); + } + + unsigned long counter; + label c; + button b; + color_box cb; + menu_bar mbar; +}; + +// ---------------------------------------------------------------------------- + +int main() +{ + // create our window + win my_window; + + + // wait until the user closes this window before we let the program + // terminate. + my_window.wait_until_closed(); + + return 0; +} + +// ---------------------------------------------------------------------------- + +// Normally, if you built this application on MS Windows in Visual Studio you +// would see a black console window pop up when you ran it. The following +// #pragma directives tell Visual Studio to not include a console window along +// with your application. However, if you prefer to have the console pop up as +// well then simply remove these #pragma statements. +#ifdef _MSC_VER +# pragma comment( linker, "/entry:mainCRTStartup" ) +# pragma comment( linker, "/SUBSYSTEM:WINDOWS" ) +#endif + +// ---------------------------------------------------------------------------- + diff --git a/ml/dlib/examples/hough_transform_ex.cpp b/ml/dlib/examples/hough_transform_ex.cpp new file mode 100644 index 00000000..1c8b9f7b --- /dev/null +++ b/ml/dlib/examples/hough_transform_ex.cpp @@ -0,0 +1,84 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the Hough transform tool in the + dlib C++ Library. + + + In this example we are going to draw a line on an image and then use the + Hough transform to detect the location of the line. Moreover, we do this in + a loop that changes the line's position slightly each iteration, which gives + a pretty animation of the Hough transform in action. +*/ + +#include <dlib/gui_widgets.h> +#include <dlib/image_transforms.h> + +using namespace dlib; + +int main() +{ + // First let's make a 400x400 image. This will form the input to the Hough transform. + array2d<unsigned char> img(400,400); + // Now we make a hough_transform object. The 300 here means that the Hough transform + // will operate on a 300x300 subwindow of its input image. + hough_transform ht(300); + + image_window win, win2; + double angle1 = 0; + double angle2 = 0; + while(true) + { + // Generate a line segment that is rotating around inside the image. The line is + // generated based on the values in angle1 and angle2. So each iteration creates a + // slightly different line. + angle1 += pi/130; + angle2 += pi/400; + const point cent = center(get_rect(img)); + // A point 90 pixels away from the center of the image but rotated by angle1. + const point arc = rotate_point(cent, cent + point(90,0), angle1); + // Now make a line that goes though arc but rotate it by angle2. + const point l = rotate_point(arc, arc + point(500,0), angle2); + const point r = rotate_point(arc, arc - point(500,0), angle2); + + + // Next, blank out the input image and then draw our line on it. + assign_all_pixels(img, 0); + draw_line(img, l, r, 255); + + + const point offset(50,50); + array2d<int> himg; + // pick the window inside img on which we will run the Hough transform. + const rectangle box = translate_rect(get_rect(ht),offset); + // Now let's compute the hough transform for a subwindow in the image. In + // particular, we run it on the 300x300 subwindow with an upper left corner at the + // pixel point(50,50). The output is stored in himg. + ht(img, box, himg); + // Now that we have the transformed image, the Hough image pixel with the largest + // value should indicate where the line is. So we find the coordinates of the + // largest pixel: + point p = max_point(mat(himg)); + // And then ask the ht object for the line segment in the original image that + // corresponds to this point in Hough transform space. + std::pair<point,point> line = ht.get_line(p); + + // Finally, let's display all these things on the screen. We copy the original + // input image into a color image and then draw the detected line on top in red. + array2d<rgb_pixel> temp; + assign_image(temp, img); + // Note that we must offset the output line to account for our offset subwindow. + // We do this by just adding in the offset to the line endpoints. + draw_line(temp, line.first+offset, line.second+offset, rgb_pixel(255,0,0)); + win.clear_overlay(); + win.set_image(temp); + // Also show the subwindow we ran the Hough transform on as a green box. You will + // see that the detected line is exactly contained within this box and also + // overlaps the original line. + win.add_overlay(box, rgb_pixel(0,255,0)); + + // We can also display the Hough transform itself using the jet color scheme. + win2.set_image(jet(himg)); + } +} + diff --git a/ml/dlib/examples/image_ex.cpp b/ml/dlib/examples/image_ex.cpp new file mode 100644 index 00000000..14868269 --- /dev/null +++ b/ml/dlib/examples/image_ex.cpp @@ -0,0 +1,104 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the GUI API as well as some + aspects of image manipulation from the dlib C++ Library. + + + This is a pretty simple example. It takes a BMP file on the command line + and opens it up, runs a simple edge detection algorithm on it, and + displays the results on the screen. +*/ + + + +#include <dlib/gui_widgets.h> +#include <dlib/image_io.h> +#include <dlib/image_transforms.h> +#include <fstream> + + +using namespace std; +using namespace dlib; + +// ---------------------------------------------------------------------------- + +int main(int argc, char** argv) +{ + try + { + // make sure the user entered an argument to this program + if (argc != 2) + { + cout << "error, you have to enter a BMP file as an argument to this program" << endl; + return 1; + } + + // Here we declare an image object that can store rgb_pixels. Note that in + // dlib there is no explicit image object, just a 2D array and + // various pixel types. + array2d<rgb_pixel> img; + + // Now load the image file into our image. If something is wrong then + // load_image() will throw an exception. Also, if you linked with libpng + // and libjpeg then load_image() can load PNG and JPEG files in addition + // to BMP files. + load_image(img, argv[1]); + + + // Now let's use some image functions. First let's blur the image a little. + array2d<unsigned char> blurred_img; + gaussian_blur(img, blurred_img); + + // Now find the horizontal and vertical gradient images. + array2d<short> horz_gradient, vert_gradient; + array2d<unsigned char> edge_image; + sobel_edge_detector(blurred_img, horz_gradient, vert_gradient); + + // now we do the non-maximum edge suppression step so that our edges are nice and thin + suppress_non_maximum_edges(horz_gradient, vert_gradient, edge_image); + + // Now we would like to see what our images look like. So let's use a + // window to display them on the screen. (Note that you can zoom into + // the window by holding CTRL and scrolling the mouse wheel) + image_window my_window(edge_image, "Normal Edge Image"); + + // We can also easily display the edge_image as a heatmap or using the jet color + // scheme like so. + image_window win_hot(heatmap(edge_image)); + image_window win_jet(jet(edge_image)); + + // also make a window to display the original image + image_window my_window2(img, "Original Image"); + + // Sometimes you want to get input from the user about which pixels are important + // for some task. You can do this easily by trapping user clicks as shown below. + // This loop executes every time the user double clicks on some image pixel and it + // will terminate once the user closes the window. + point p; + while (my_window.get_next_double_click(p)) + { + cout << "User double clicked on pixel: " << p << endl; + cout << "edge pixel value at this location is: " << (int)edge_image[p.y()][p.x()] << endl; + } + + // wait until the user closes the windows before we let the program + // terminate. + win_hot.wait_until_closed(); + my_window2.wait_until_closed(); + + + // Finally, note that you can access the elements of an image using the normal [row][column] + // operator like so: + cout << horz_gradient[0][3] << endl; + cout << "number of rows in image: " << horz_gradient.nr() << endl; + cout << "number of columns in image: " << horz_gradient.nc() << endl; + } + catch (exception& e) + { + cout << "exception thrown: " << e.what() << endl; + } +} + +// ---------------------------------------------------------------------------- + diff --git a/ml/dlib/examples/integrate_function_adapt_simp_ex.cpp b/ml/dlib/examples/integrate_function_adapt_simp_ex.cpp new file mode 100644 index 00000000..6d2c8f76 --- /dev/null +++ b/ml/dlib/examples/integrate_function_adapt_simp_ex.cpp @@ -0,0 +1,89 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This example demonstrates the usage of the numerical quadrature function + integrate_function_adapt_simp(). This function takes as input a single variable + function, the endpoints of a domain over which the function will be integrated, and a + tolerance parameter. It outputs an approximation of the integral of this function over + the specified domain. The algorithm is based on the adaptive Simpson method outlined in: + + Numerical Integration method based on the adaptive Simpson method in + Gander, W. and W. Gautschi, "Adaptive Quadrature – Revisited," + BIT, Vol. 40, 2000, pp. 84-101 + +*/ + +#include <iostream> +#include <dlib/matrix.h> +#include <dlib/numeric_constants.h> +#include <dlib/numerical_integration.h> + +using namespace std; +using namespace dlib; + +// Here we the set of functions that we wish to integrate and comment in the domain of +// integration. + +// x in [0,1] +double gg1(double x) +{ + return pow(e,x); +} + +// x in [0,1] +double gg2(double x) +{ + return x*x; +} + +// x in [0, pi] +double gg3(double x) +{ + return 1/(x*x + cos(x)*cos(x)); +} + +// x in [-pi, pi] +double gg4(double x) +{ + return sin(x); +} + +// x in [0,2] +double gg5(double x) +{ + return 1/(1 + x*x); +} + +int main() +{ + // We first define a tolerance parameter. Roughly speaking, a lower tolerance will + // result in a more accurate approximation of the true integral. However, there are + // instances where too small of a tolerance may yield a less accurate approximation + // than a larger tolerance. We recommend taking the tolerance to be in the + // [1e-10, 1e-8] region. + + double tol = 1e-10; + + + // Here we compute the integrals of the five functions defined above using the same + // tolerance level for each. + + double m1 = integrate_function_adapt_simp(&gg1, 0.0, 1.0, tol); + double m2 = integrate_function_adapt_simp(&gg2, 0.0, 1.0, tol); + double m3 = integrate_function_adapt_simp(&gg3, 0.0, pi, tol); + double m4 = integrate_function_adapt_simp(&gg4, -pi, pi, tol); + double m5 = integrate_function_adapt_simp(&gg5, 0.0, 2.0, tol); + + // We finally print out the values of each of the approximated integrals to ten + // significant digits. + + cout << "\nThe integral of exp(x) for x in [0,1] is " << std::setprecision(10) << m1 << endl; + cout << "The integral of x^2 for in [0,1] is " << std::setprecision(10) << m2 << endl; + cout << "The integral of 1/(x^2 + cos(x)^2) for in [0,pi] is " << std::setprecision(10) << m3 << endl; + cout << "The integral of sin(x) for in [-pi,pi] is " << std::setprecision(10) << m4 << endl; + cout << "The integral of 1/(1+x^2) for in [0,2] is " << std::setprecision(10) << m5 << endl; + cout << endl; + + return 0; +} + diff --git a/ml/dlib/examples/iosockstream_ex.cpp b/ml/dlib/examples/iosockstream_ex.cpp new file mode 100644 index 00000000..8a5dbbb2 --- /dev/null +++ b/ml/dlib/examples/iosockstream_ex.cpp @@ -0,0 +1,47 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the iosockstream object from the + dlib C++ Library. + + This program simply connects to www.google.com at port 80 and requests the + main Google web page. It then prints what it gets back from Google to the + screen. + + + For those of you curious about HTTP check out the excellent introduction at + http://www.jmarshall.com/easy/http/ +*/ + +#include <dlib/iosockstream.h> +#include <iostream> + +using namespace std; +using namespace dlib; + +int main() +{ + try + { + // Connect to Google's web server which listens on port 80. If this + // fails it will throw a dlib::socket_error exception. + iosockstream stream("www.google.com:80"); + + // At this point, we can use stream the same way we would use any other + // C++ iostream object. So to test it out, let's make a HTTP GET request + // for the main Google page. + stream << "GET / HTTP/1.0\r\n\r\n"; + + // Here we print each character we get back one at a time. + while (stream.peek() != EOF) + { + cout << (char)stream.get(); + } + } + catch (exception& e) + { + cout << e.what() << endl; + } +} + + diff --git a/ml/dlib/examples/johns/John_Salley/000179_02159509.jpg b/ml/dlib/examples/johns/John_Salley/000179_02159509.jpg Binary files differnew file mode 100644 index 00000000..7bdd1e26 --- /dev/null +++ b/ml/dlib/examples/johns/John_Salley/000179_02159509.jpg diff --git a/ml/dlib/examples/johns/John_Salley/000183_02159543.jpg b/ml/dlib/examples/johns/John_Salley/000183_02159543.jpg Binary files differnew file mode 100644 index 00000000..f2740190 --- /dev/null +++ b/ml/dlib/examples/johns/John_Salley/000183_02159543.jpg diff --git a/ml/dlib/examples/johns/John_Salley/000186_02159346.jpg b/ml/dlib/examples/johns/John_Salley/000186_02159346.jpg Binary files differnew file mode 100644 index 00000000..1199b39e --- /dev/null +++ b/ml/dlib/examples/johns/John_Salley/000186_02159346.jpg diff --git a/ml/dlib/examples/johns/John_Salley/000189_02159361.jpg b/ml/dlib/examples/johns/John_Salley/000189_02159361.jpg Binary files differnew file mode 100644 index 00000000..e5197e32 --- /dev/null +++ b/ml/dlib/examples/johns/John_Salley/000189_02159361.jpg diff --git a/ml/dlib/examples/johns/John_Salley/000190_02159501.jpg b/ml/dlib/examples/johns/John_Salley/000190_02159501.jpg Binary files differnew file mode 100644 index 00000000..56c62c21 --- /dev/null +++ b/ml/dlib/examples/johns/John_Salley/000190_02159501.jpg diff --git a/ml/dlib/examples/johns/John_Salley/000192_02159531.jpg b/ml/dlib/examples/johns/John_Salley/000192_02159531.jpg Binary files differnew file mode 100644 index 00000000..e0b4c2b2 --- /dev/null +++ b/ml/dlib/examples/johns/John_Salley/000192_02159531.jpg diff --git a/ml/dlib/examples/johns/John_Salley/000194_02159572.jpg b/ml/dlib/examples/johns/John_Salley/000194_02159572.jpg Binary files differnew file mode 100644 index 00000000..08cdce3d --- /dev/null +++ b/ml/dlib/examples/johns/John_Salley/000194_02159572.jpg diff --git a/ml/dlib/examples/johns/John_Salley/000197_02159322.jpg b/ml/dlib/examples/johns/John_Salley/000197_02159322.jpg Binary files differnew file mode 100644 index 00000000..b65c1e70 --- /dev/null +++ b/ml/dlib/examples/johns/John_Salley/000197_02159322.jpg diff --git a/ml/dlib/examples/johns/John_Salley/000197_02159525.jpg b/ml/dlib/examples/johns/John_Salley/000197_02159525.jpg Binary files differnew file mode 100644 index 00000000..36e07232 --- /dev/null +++ b/ml/dlib/examples/johns/John_Salley/000197_02159525.jpg diff --git a/ml/dlib/examples/johns/John_Salley/000198_02159470.jpg b/ml/dlib/examples/johns/John_Salley/000198_02159470.jpg Binary files differnew file mode 100644 index 00000000..af3132e6 --- /dev/null +++ b/ml/dlib/examples/johns/John_Salley/000198_02159470.jpg diff --git a/ml/dlib/examples/johns/John_Salley/000200_02159354.jpg b/ml/dlib/examples/johns/John_Salley/000200_02159354.jpg Binary files differnew file mode 100644 index 00000000..8e345696 --- /dev/null +++ b/ml/dlib/examples/johns/John_Salley/000200_02159354.jpg diff --git a/ml/dlib/examples/johns/John_Savage/000264_01099001.jpg b/ml/dlib/examples/johns/John_Savage/000264_01099001.jpg Binary files differnew file mode 100644 index 00000000..9f0bd6c3 --- /dev/null +++ b/ml/dlib/examples/johns/John_Savage/000264_01099001.jpg diff --git a/ml/dlib/examples/johns/John_Savage/000274_01099061.jpg b/ml/dlib/examples/johns/John_Savage/000274_01099061.jpg Binary files differnew file mode 100644 index 00000000..6f90d307 --- /dev/null +++ b/ml/dlib/examples/johns/John_Savage/000274_01099061.jpg diff --git a/ml/dlib/examples/johns/John_Savage/000277_01099000.jpg b/ml/dlib/examples/johns/John_Savage/000277_01099000.jpg Binary files differnew file mode 100644 index 00000000..51f6501b --- /dev/null +++ b/ml/dlib/examples/johns/John_Savage/000277_01099000.jpg diff --git a/ml/dlib/examples/johns/John_Savage/000289_01099139.jpg b/ml/dlib/examples/johns/John_Savage/000289_01099139.jpg Binary files differnew file mode 100644 index 00000000..dc6422e3 --- /dev/null +++ b/ml/dlib/examples/johns/John_Savage/000289_01099139.jpg diff --git a/ml/dlib/examples/johns/John_Savage/000290_01099067.jpg b/ml/dlib/examples/johns/John_Savage/000290_01099067.jpg Binary files differnew file mode 100644 index 00000000..fad9ea44 --- /dev/null +++ b/ml/dlib/examples/johns/John_Savage/000290_01099067.jpg diff --git a/ml/dlib/examples/johns/John_Savage/000290_01099090.jpg b/ml/dlib/examples/johns/John_Savage/000290_01099090.jpg Binary files differnew file mode 100644 index 00000000..d25605bf --- /dev/null +++ b/ml/dlib/examples/johns/John_Savage/000290_01099090.jpg diff --git a/ml/dlib/examples/johns/John_Savage/000291_01099023.jpg b/ml/dlib/examples/johns/John_Savage/000291_01099023.jpg Binary files differnew file mode 100644 index 00000000..0299f0fb --- /dev/null +++ b/ml/dlib/examples/johns/John_Savage/000291_01099023.jpg diff --git a/ml/dlib/examples/johns/John_Savage/000291_01099214.jpg b/ml/dlib/examples/johns/John_Savage/000291_01099214.jpg Binary files differnew file mode 100644 index 00000000..e6e6fdda --- /dev/null +++ b/ml/dlib/examples/johns/John_Savage/000291_01099214.jpg diff --git a/ml/dlib/examples/johns/John_Savage/000293_01099081.jpg b/ml/dlib/examples/johns/John_Savage/000293_01099081.jpg Binary files differnew file mode 100644 index 00000000..6feb7670 --- /dev/null +++ b/ml/dlib/examples/johns/John_Savage/000293_01099081.jpg diff --git a/ml/dlib/examples/johns/John_Savage/000296_01099007.jpg b/ml/dlib/examples/johns/John_Savage/000296_01099007.jpg Binary files differnew file mode 100644 index 00000000..356e9137 --- /dev/null +++ b/ml/dlib/examples/johns/John_Savage/000296_01099007.jpg diff --git a/ml/dlib/examples/johns/John_Savage/000299_01099008.jpg b/ml/dlib/examples/johns/John_Savage/000299_01099008.jpg Binary files differnew file mode 100644 index 00000000..28970e84 --- /dev/null +++ b/ml/dlib/examples/johns/John_Savage/000299_01099008.jpg diff --git a/ml/dlib/examples/johns/John_Schneider/000288_00925786.jpg b/ml/dlib/examples/johns/John_Schneider/000288_00925786.jpg Binary files differnew file mode 100644 index 00000000..bcdf2df1 --- /dev/null +++ b/ml/dlib/examples/johns/John_Schneider/000288_00925786.jpg diff --git a/ml/dlib/examples/johns/John_Schneider/000302_00925785.jpg b/ml/dlib/examples/johns/John_Schneider/000302_00925785.jpg Binary files differnew file mode 100644 index 00000000..01bcbe34 --- /dev/null +++ b/ml/dlib/examples/johns/John_Schneider/000302_00925785.jpg diff --git a/ml/dlib/examples/johns/John_Schneider/000307_00925823.jpg b/ml/dlib/examples/johns/John_Schneider/000307_00925823.jpg Binary files differnew file mode 100644 index 00000000..996ae82b --- /dev/null +++ b/ml/dlib/examples/johns/John_Schneider/000307_00925823.jpg diff --git a/ml/dlib/examples/johns/John_Schneider/000325_00925954.jpg b/ml/dlib/examples/johns/John_Schneider/000325_00925954.jpg Binary files differnew file mode 100644 index 00000000..bd44c19a --- /dev/null +++ b/ml/dlib/examples/johns/John_Schneider/000325_00925954.jpg diff --git a/ml/dlib/examples/johns/John_Schneider/000326_00925765.jpg b/ml/dlib/examples/johns/John_Schneider/000326_00925765.jpg Binary files differnew file mode 100644 index 00000000..fee3a806 --- /dev/null +++ b/ml/dlib/examples/johns/John_Schneider/000326_00925765.jpg diff --git a/ml/dlib/examples/johns/John_Schneider/000326_00926089.jpg b/ml/dlib/examples/johns/John_Schneider/000326_00926089.jpg Binary files differnew file mode 100644 index 00000000..5f469934 --- /dev/null +++ b/ml/dlib/examples/johns/John_Schneider/000326_00926089.jpg diff --git a/ml/dlib/examples/johns/John_Schneider/000326_00926128.jpg b/ml/dlib/examples/johns/John_Schneider/000326_00926128.jpg Binary files differnew file mode 100644 index 00000000..3013a084 --- /dev/null +++ b/ml/dlib/examples/johns/John_Schneider/000326_00926128.jpg diff --git a/ml/dlib/examples/johns/John_Schneider/000326_00926139.jpg b/ml/dlib/examples/johns/John_Schneider/000326_00926139.jpg Binary files differnew file mode 100644 index 00000000..e24dd9e4 --- /dev/null +++ b/ml/dlib/examples/johns/John_Schneider/000326_00926139.jpg diff --git a/ml/dlib/examples/johns/John_Schneider/000329_00925859.jpg b/ml/dlib/examples/johns/John_Schneider/000329_00925859.jpg Binary files differnew file mode 100644 index 00000000..24fa155f --- /dev/null +++ b/ml/dlib/examples/johns/John_Schneider/000329_00925859.jpg diff --git a/ml/dlib/examples/johns/John_Schneider/000329_00925963.jpg b/ml/dlib/examples/johns/John_Schneider/000329_00925963.jpg Binary files differnew file mode 100644 index 00000000..be7abafa --- /dev/null +++ b/ml/dlib/examples/johns/John_Schneider/000329_00925963.jpg diff --git a/ml/dlib/examples/johns/John_Schneider/000331_00926012.jpg b/ml/dlib/examples/johns/John_Schneider/000331_00926012.jpg Binary files differnew file mode 100644 index 00000000..7e10e954 --- /dev/null +++ b/ml/dlib/examples/johns/John_Schneider/000331_00926012.jpg diff --git a/ml/dlib/examples/johns/John_Shimkus/000373_03228153.jpg b/ml/dlib/examples/johns/John_Shimkus/000373_03228153.jpg Binary files differnew file mode 100644 index 00000000..1aa01480 --- /dev/null +++ b/ml/dlib/examples/johns/John_Shimkus/000373_03228153.jpg diff --git a/ml/dlib/examples/johns/John_Shimkus/000375_03227651.jpg b/ml/dlib/examples/johns/John_Shimkus/000375_03227651.jpg Binary files differnew file mode 100644 index 00000000..d118a789 --- /dev/null +++ b/ml/dlib/examples/johns/John_Shimkus/000375_03227651.jpg diff --git a/ml/dlib/examples/johns/John_Shimkus/000376_02340068.jpg b/ml/dlib/examples/johns/John_Shimkus/000376_02340068.jpg Binary files differnew file mode 100644 index 00000000..2a87e98a --- /dev/null +++ b/ml/dlib/examples/johns/John_Shimkus/000376_02340068.jpg diff --git a/ml/dlib/examples/johns/John_Shimkus/000378_02340151.jpg b/ml/dlib/examples/johns/John_Shimkus/000378_02340151.jpg Binary files differnew file mode 100644 index 00000000..b835c30e --- /dev/null +++ b/ml/dlib/examples/johns/John_Shimkus/000378_02340151.jpg diff --git a/ml/dlib/examples/johns/John_Shimkus/000378_03227610.jpg b/ml/dlib/examples/johns/John_Shimkus/000378_03227610.jpg Binary files differnew file mode 100644 index 00000000..b835c30e --- /dev/null +++ b/ml/dlib/examples/johns/John_Shimkus/000378_03227610.jpg diff --git a/ml/dlib/examples/johns/John_Shimkus/000383_03227939.jpg b/ml/dlib/examples/johns/John_Shimkus/000383_03227939.jpg Binary files differnew file mode 100644 index 00000000..144cc0fe --- /dev/null +++ b/ml/dlib/examples/johns/John_Shimkus/000383_03227939.jpg diff --git a/ml/dlib/examples/johns/John_Shimkus/000385_03227766.jpg b/ml/dlib/examples/johns/John_Shimkus/000385_03227766.jpg Binary files differnew file mode 100644 index 00000000..e543125e --- /dev/null +++ b/ml/dlib/examples/johns/John_Shimkus/000385_03227766.jpg diff --git a/ml/dlib/examples/johns/John_Shimkus/000388_03227773.jpg b/ml/dlib/examples/johns/John_Shimkus/000388_03227773.jpg Binary files differnew file mode 100644 index 00000000..d2a7f1ec --- /dev/null +++ b/ml/dlib/examples/johns/John_Shimkus/000388_03227773.jpg diff --git a/ml/dlib/examples/johns/John_Shimkus/000390_03227666.jpg b/ml/dlib/examples/johns/John_Shimkus/000390_03227666.jpg Binary files differnew file mode 100644 index 00000000..f4836bad --- /dev/null +++ b/ml/dlib/examples/johns/John_Shimkus/000390_03227666.jpg diff --git a/ml/dlib/examples/johns/John_Shimkus/000394_02340150.jpg b/ml/dlib/examples/johns/John_Shimkus/000394_02340150.jpg Binary files differnew file mode 100644 index 00000000..e8d22bc6 --- /dev/null +++ b/ml/dlib/examples/johns/John_Shimkus/000394_02340150.jpg diff --git a/ml/dlib/examples/johns/John_Shimkus/000396_03227722.jpg b/ml/dlib/examples/johns/John_Shimkus/000396_03227722.jpg Binary files differnew file mode 100644 index 00000000..03c08391 --- /dev/null +++ b/ml/dlib/examples/johns/John_Shimkus/000396_03227722.jpg diff --git a/ml/dlib/examples/johns/John_Simm/000288_00470387.jpg b/ml/dlib/examples/johns/John_Simm/000288_00470387.jpg Binary files differnew file mode 100644 index 00000000..eab153f2 --- /dev/null +++ b/ml/dlib/examples/johns/John_Simm/000288_00470387.jpg diff --git a/ml/dlib/examples/johns/John_Simm/000297_00470170.jpg b/ml/dlib/examples/johns/John_Simm/000297_00470170.jpg Binary files differnew file mode 100644 index 00000000..31f0a1c7 --- /dev/null +++ b/ml/dlib/examples/johns/John_Simm/000297_00470170.jpg diff --git a/ml/dlib/examples/johns/John_Simm/000300_00470148.jpg b/ml/dlib/examples/johns/John_Simm/000300_00470148.jpg Binary files differnew file mode 100644 index 00000000..46fd170e --- /dev/null +++ b/ml/dlib/examples/johns/John_Simm/000300_00470148.jpg diff --git a/ml/dlib/examples/johns/John_Simm/000304_00470122.jpg b/ml/dlib/examples/johns/John_Simm/000304_00470122.jpg Binary files differnew file mode 100644 index 00000000..cb96ad0b --- /dev/null +++ b/ml/dlib/examples/johns/John_Simm/000304_00470122.jpg diff --git a/ml/dlib/examples/johns/John_Simm/000305_00470162.jpg b/ml/dlib/examples/johns/John_Simm/000305_00470162.jpg Binary files differnew file mode 100644 index 00000000..f3a08ac3 --- /dev/null +++ b/ml/dlib/examples/johns/John_Simm/000305_00470162.jpg diff --git a/ml/dlib/examples/johns/John_Simm/000305_00470717.jpg b/ml/dlib/examples/johns/John_Simm/000305_00470717.jpg Binary files differnew file mode 100644 index 00000000..b2f7570b --- /dev/null +++ b/ml/dlib/examples/johns/John_Simm/000305_00470717.jpg diff --git a/ml/dlib/examples/johns/John_Simm/000306_00470222.jpg b/ml/dlib/examples/johns/John_Simm/000306_00470222.jpg Binary files differnew file mode 100644 index 00000000..29180d4f --- /dev/null +++ b/ml/dlib/examples/johns/John_Simm/000306_00470222.jpg diff --git a/ml/dlib/examples/johns/John_Simm/000306_00470223.jpg b/ml/dlib/examples/johns/John_Simm/000306_00470223.jpg Binary files differnew file mode 100644 index 00000000..58845f21 --- /dev/null +++ b/ml/dlib/examples/johns/John_Simm/000306_00470223.jpg diff --git a/ml/dlib/examples/johns/John_Simm/000309_00470287.jpg b/ml/dlib/examples/johns/John_Simm/000309_00470287.jpg Binary files differnew file mode 100644 index 00000000..2134a8e9 --- /dev/null +++ b/ml/dlib/examples/johns/John_Simm/000309_00470287.jpg diff --git a/ml/dlib/examples/johns/John_Simm/000310_00470421.jpg b/ml/dlib/examples/johns/John_Simm/000310_00470421.jpg Binary files differnew file mode 100644 index 00000000..b79b2d5c --- /dev/null +++ b/ml/dlib/examples/johns/John_Simm/000310_00470421.jpg diff --git a/ml/dlib/examples/johns/John_Simm/000310_00470511.jpg b/ml/dlib/examples/johns/John_Simm/000310_00470511.jpg Binary files differnew file mode 100644 index 00000000..9119e4ac --- /dev/null +++ b/ml/dlib/examples/johns/John_Simm/000310_00470511.jpg diff --git a/ml/dlib/examples/kcentroid_ex.cpp b/ml/dlib/examples/kcentroid_ex.cpp new file mode 100644 index 00000000..1f9311bc --- /dev/null +++ b/ml/dlib/examples/kcentroid_ex.cpp @@ -0,0 +1,129 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This is an example illustrating the use of the kcentroid object + from the dlib C++ Library. + + The kcentroid object is an implementation of an algorithm that recursively + computes the centroid (i.e. average) of a set of points. The interesting + thing about dlib::kcentroid is that it does so in a kernel induced feature + space. This means that you can use it as a non-linear one-class classifier. + So you might use it to perform online novelty detection (although, it has + other uses, see the svm_pegasos or kkmeans examples for example). + + This example will train an instance of it on points from the sinc function. + +*/ + +#include <iostream> +#include <vector> + +#include <dlib/svm.h> +#include <dlib/statistics.h> + +using namespace std; +using namespace dlib; + +// Here is the sinc function we will be trying to learn with the kcentroid +// object. +double sinc(double x) +{ + if (x == 0) + return 1; + return sin(x)/x; +} + +int main() +{ + // Here we declare that our samples will be 2 dimensional column vectors. + // (Note that if you don't know the dimensionality of your vectors at compile time + // you can change the 2 to a 0 and then set the size at runtime) + typedef matrix<double,2,1> sample_type; + + // Now we are making a typedef for the kind of kernel we want to use. I picked the + // radial basis kernel because it only has one parameter and generally gives good + // results without much fiddling. + typedef radial_basis_kernel<sample_type> kernel_type; + + // Here we declare an instance of the kcentroid object. The kcentroid has 3 parameters + // you need to set. The first argument to the constructor is the kernel we wish to + // use. The second is a parameter that determines the numerical accuracy with which + // the object will perform the centroid estimation. Generally, smaller values + // give better results but cause the algorithm to attempt to use more dictionary vectors + // (and thus run slower and use more memory). The third argument, however, is the + // maximum number of dictionary vectors a kcentroid is allowed to use. So you can use + // it to control the runtime complexity. + kcentroid<kernel_type> test(kernel_type(0.1),0.01, 15); + + + // now we train our object on a few samples of the sinc function. + sample_type m; + for (double x = -15; x <= 8; x += 1) + { + m(0) = x; + m(1) = sinc(x); + test.train(m); + } + + running_stats<double> rs; + + // Now let's output the distance from the centroid to some points that are from the sinc function. + // These numbers should all be similar. We will also calculate the statistics of these numbers + // by accumulating them into the running_stats object called rs. This will let us easily + // find the mean and standard deviation of the distances for use below. + cout << "Points that are on the sinc function:\n"; + m(0) = -1.5; m(1) = sinc(m(0)); cout << " " << test(m) << endl; rs.add(test(m)); + m(0) = -1.5; m(1) = sinc(m(0)); cout << " " << test(m) << endl; rs.add(test(m)); + m(0) = -0; m(1) = sinc(m(0)); cout << " " << test(m) << endl; rs.add(test(m)); + m(0) = -0.5; m(1) = sinc(m(0)); cout << " " << test(m) << endl; rs.add(test(m)); + m(0) = -4.1; m(1) = sinc(m(0)); cout << " " << test(m) << endl; rs.add(test(m)); + m(0) = -1.5; m(1) = sinc(m(0)); cout << " " << test(m) << endl; rs.add(test(m)); + m(0) = -0.5; m(1) = sinc(m(0)); cout << " " << test(m) << endl; rs.add(test(m)); + + cout << endl; + // Let's output the distance from the centroid to some points that are NOT from the sinc function. + // These numbers should all be significantly bigger than previous set of numbers. We will also + // use the rs.scale() function to find out how many standard deviations they are away from the + // mean of the test points from the sinc function. So in this case our criterion for "significantly bigger" + // is > 3 or 4 standard deviations away from the above points that actually are on the sinc function. + cout << "Points that are NOT on the sinc function:\n"; + m(0) = -1.5; m(1) = sinc(m(0))+4; cout << " " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl; + m(0) = -1.5; m(1) = sinc(m(0))+3; cout << " " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl; + m(0) = -0; m(1) = -sinc(m(0)); cout << " " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl; + m(0) = -0.5; m(1) = -sinc(m(0)); cout << " " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl; + m(0) = -4.1; m(1) = sinc(m(0))+2; cout << " " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl; + m(0) = -1.5; m(1) = sinc(m(0))+0.9; cout << " " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl; + m(0) = -0.5; m(1) = sinc(m(0))+1; cout << " " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl; + + // And finally print out the mean and standard deviation of points that are actually from sinc(). + cout << "\nmean: " << rs.mean() << endl; + cout << "standard deviation: " << rs.stddev() << endl; + + // The output is as follows: + /* + Points that are on the sinc function: + 0.869913 + 0.869913 + 0.873408 + 0.872807 + 0.870432 + 0.869913 + 0.872807 + + Points that are NOT on the sinc function: + 1.06366 is 119.65 standard deviations from sinc. + 1.02212 is 93.8106 standard deviations from sinc. + 0.921382 is 31.1458 standard deviations from sinc. + 0.918439 is 29.3147 standard deviations from sinc. + 0.931428 is 37.3949 standard deviations from sinc. + 0.898018 is 16.6121 standard deviations from sinc. + 0.914425 is 26.8183 standard deviations from sinc. + + mean: 0.871313 + standard deviation: 0.00160756 + */ + + // So we can see that in this example the kcentroid object correctly indicates that + // the non-sinc points are definitely not points from the sinc function. +} + + diff --git a/ml/dlib/examples/kkmeans_ex.cpp b/ml/dlib/examples/kkmeans_ex.cpp new file mode 100644 index 00000000..76ea33cb --- /dev/null +++ b/ml/dlib/examples/kkmeans_ex.cpp @@ -0,0 +1,154 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This is an example illustrating the use of the kkmeans object + and spectral_cluster() routine from the dlib C++ Library. + + The kkmeans object is an implementation of a kernelized k-means clustering + algorithm. It is implemented by using the kcentroid object to represent + each center found by the usual k-means clustering algorithm. + + So this object allows you to perform non-linear clustering in the same way + a svm classifier finds non-linear decision surfaces. + + This example will make points from 3 classes and perform kernelized k-means + clustering on those points. It will also do the same thing using spectral + clustering. + + The classes are as follows: + - points very close to the origin + - points on the circle of radius 10 around the origin + - points that are on a circle of radius 4 but not around the origin at all +*/ + +#include <iostream> +#include <vector> + +#include <dlib/clustering.h> +#include <dlib/rand.h> + +using namespace std; +using namespace dlib; + +int main() +{ + // Here we declare that our samples will be 2 dimensional column vectors. + // (Note that if you don't know the dimensionality of your vectors at compile time + // you can change the 2 to a 0 and then set the size at runtime) + typedef matrix<double,2,1> sample_type; + + // Now we are making a typedef for the kind of kernel we want to use. I picked the + // radial basis kernel because it only has one parameter and generally gives good + // results without much fiddling. + typedef radial_basis_kernel<sample_type> kernel_type; + + + // Here we declare an instance of the kcentroid object. It is the object used to + // represent each of the centers used for clustering. The kcentroid has 3 parameters + // you need to set. The first argument to the constructor is the kernel we wish to + // use. The second is a parameter that determines the numerical accuracy with which + // the object will perform part of the learning algorithm. Generally, smaller values + // give better results but cause the algorithm to attempt to use more dictionary vectors + // (and thus run slower and use more memory). The third argument, however, is the + // maximum number of dictionary vectors a kcentroid is allowed to use. So you can use + // it to control the runtime complexity. + kcentroid<kernel_type> kc(kernel_type(0.1),0.01, 8); + + // Now we make an instance of the kkmeans object and tell it to use kcentroid objects + // that are configured with the parameters from the kc object we defined above. + kkmeans<kernel_type> test(kc); + + std::vector<sample_type> samples; + std::vector<sample_type> initial_centers; + + sample_type m; + + dlib::rand rnd; + + // we will make 50 points from each class + const long num = 50; + + // make some samples near the origin + double radius = 0.5; + for (long i = 0; i < num; ++i) + { + double sign = 1; + if (rnd.get_random_double() < 0.5) + sign = -1; + m(0) = 2*radius*rnd.get_random_double()-radius; + m(1) = sign*sqrt(radius*radius - m(0)*m(0)); + + // add this sample to our set of samples we will run k-means + samples.push_back(m); + } + + // make some samples in a circle around the origin but far away + radius = 10.0; + for (long i = 0; i < num; ++i) + { + double sign = 1; + if (rnd.get_random_double() < 0.5) + sign = -1; + m(0) = 2*radius*rnd.get_random_double()-radius; + m(1) = sign*sqrt(radius*radius - m(0)*m(0)); + + // add this sample to our set of samples we will run k-means + samples.push_back(m); + } + + // make some samples in a circle around the point (25,25) + radius = 4.0; + for (long i = 0; i < num; ++i) + { + double sign = 1; + if (rnd.get_random_double() < 0.5) + sign = -1; + m(0) = 2*radius*rnd.get_random_double()-radius; + m(1) = sign*sqrt(radius*radius - m(0)*m(0)); + + // translate this point away from the origin + m(0) += 25; + m(1) += 25; + + // add this sample to our set of samples we will run k-means + samples.push_back(m); + } + + // tell the kkmeans object we made that we want to run k-means with k set to 3. + // (i.e. we want 3 clusters) + test.set_number_of_centers(3); + + // You need to pick some initial centers for the k-means algorithm. So here + // we will use the dlib::pick_initial_centers() function which tries to find + // n points that are far apart (basically). + pick_initial_centers(3, initial_centers, samples, test.get_kernel()); + + // now run the k-means algorithm on our set of samples. + test.train(samples,initial_centers); + + // now loop over all our samples and print out their predicted class. In this example + // all points are correctly identified. + for (unsigned long i = 0; i < samples.size()/3; ++i) + { + cout << test(samples[i]) << " "; + cout << test(samples[i+num]) << " "; + cout << test(samples[i+2*num]) << "\n"; + } + + // Now print out how many dictionary vectors each center used. Note that + // the maximum number of 8 was reached. If you went back to the kcentroid + // constructor and changed the 8 to some bigger number you would see that these + // numbers would go up. However, 8 is all we need to correctly cluster this dataset. + cout << "num dictionary vectors for center 0: " << test.get_kcentroid(0).dictionary_size() << endl; + cout << "num dictionary vectors for center 1: " << test.get_kcentroid(1).dictionary_size() << endl; + cout << "num dictionary vectors for center 2: " << test.get_kcentroid(2).dictionary_size() << endl; + + + // Finally, we can also solve the same kind of non-linear clustering problem with + // spectral_cluster(). The output is a vector that indicates which cluster each sample + // belongs to. Just like with kkmeans, it assigns each point to the correct cluster. + std::vector<unsigned long> assignments = spectral_cluster(kernel_type(0.1), samples, 3); + cout << mat(assignments) << endl; + +} + + diff --git a/ml/dlib/examples/krls_ex.cpp b/ml/dlib/examples/krls_ex.cpp new file mode 100644 index 00000000..968f1a6d --- /dev/null +++ b/ml/dlib/examples/krls_ex.cpp @@ -0,0 +1,94 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This is an example illustrating the use of the krls object + from the dlib C++ Library. + + The krls object allows you to perform online regression. This + example will train an instance of it on the sinc function. + +*/ + +#include <iostream> +#include <vector> + +#include <dlib/svm.h> + +using namespace std; +using namespace dlib; + +// Here is the sinc function we will be trying to learn with the krls +// object. +double sinc(double x) +{ + if (x == 0) + return 1; + return sin(x)/x; +} + +int main() +{ + // Here we declare that our samples will be 1 dimensional column vectors. In general, + // you can use N dimensional vectors as inputs to the krls object. But here we only + // have 1 dimension to make the example simple. (Note that if you don't know the + // dimensionality of your vectors at compile time you can change the first number to + // a 0 and then set the size at runtime) + typedef matrix<double,1,1> sample_type; + + // Now we are making a typedef for the kind of kernel we want to use. I picked the + // radial basis kernel because it only has one parameter and generally gives good + // results without much fiddling. + typedef radial_basis_kernel<sample_type> kernel_type; + + // Here we declare an instance of the krls object. The first argument to the constructor + // is the kernel we wish to use. The second is a parameter that determines the numerical + // accuracy with which the object will perform part of the regression algorithm. Generally + // smaller values give better results but cause the algorithm to run slower. You just have + // to play with it to decide what balance of speed and accuracy is right for your problem. + // Here we have set it to 0.001. + krls<kernel_type> test(kernel_type(0.1),0.001); + + // now we train our object on a few samples of the sinc function. + sample_type m; + for (double x = -10; x <= 4; x += 1) + { + m(0) = x; + test.train(m, sinc(x)); + } + + // now we output the value of the sinc function for a few test points as well as the + // value predicted by krls object. + m(0) = 2.5; cout << sinc(m(0)) << " " << test(m) << endl; + m(0) = 0.1; cout << sinc(m(0)) << " " << test(m) << endl; + m(0) = -4; cout << sinc(m(0)) << " " << test(m) << endl; + m(0) = 5.0; cout << sinc(m(0)) << " " << test(m) << endl; + + // The output is as follows: + // 0.239389 0.239362 + // 0.998334 0.998333 + // -0.189201 -0.189201 + // -0.191785 -0.197267 + + + // The first column is the true value of the sinc function and the second + // column is the output from the krls estimate. + + + + + + // Another thing that is worth knowing is that just about everything in dlib is serializable. + // So for example, you can save the test object to disk and recall it later like so: + serialize("saved_krls_object.dat") << test; + + // Now let's open that file back up and load the krls object it contains. + deserialize("saved_krls_object.dat") >> test; + + // If you don't want to save the whole krls object (it might be a bit large) + // you can save just the decision function it has learned so far. You can get + // the decision function out of it by calling test.get_decision_function() and + // then you can serialize that object instead. E.g. + decision_function<kernel_type> funct = test.get_decision_function(); + serialize("saved_krls_function.dat") << funct; +} + + diff --git a/ml/dlib/examples/krls_filter_ex.cpp b/ml/dlib/examples/krls_filter_ex.cpp new file mode 100644 index 00000000..5bb74b18 --- /dev/null +++ b/ml/dlib/examples/krls_filter_ex.cpp @@ -0,0 +1,109 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This is an example illustrating the use of the krls object + from the dlib C++ Library. + + The krls object allows you to perform online regression. This + example will use the krls object to perform filtering of a signal + corrupted by uniformly distributed noise. +*/ + +#include <iostream> + +#include <dlib/svm.h> +#include <dlib/rand.h> + +using namespace std; +using namespace dlib; + +// Here is the function we will be trying to learn with the krls +// object. +double sinc(double x) +{ + if (x == 0) + return 1; + + // also add in x just to make this function a little more complex + return sin(x)/x + x; +} + +int main() +{ + // Here we declare that our samples will be 1 dimensional column vectors. The reason for + // using a matrix here is that in general you can use N dimensional vectors as inputs to the + // krls object. But here we only have 1 dimension to make the example simple. + typedef matrix<double,1,1> sample_type; + + + // Now we are making a typedef for the kind of kernel we want to use. I picked the + // radial basis kernel because it only has one parameter and generally gives good + // results without much fiddling. + typedef radial_basis_kernel<sample_type> kernel_type; + + + // Here we declare an instance of the krls object. The first argument to the constructor + // is the kernel we wish to use. The second is a parameter that determines the numerical + // accuracy with which the object will perform part of the regression algorithm. Generally + // smaller values give better results but cause the algorithm to run slower (because it tries + // to use more "dictionary vectors" to represent the function it is learning. + // You just have to play with it to decide what balance of speed and accuracy is right + // for your problem. Here we have set it to 0.001. + // + // The last argument is the maximum number of dictionary vectors the algorithm is allowed + // to use. The default value for this field is 1,000,000 which is large enough that you + // won't ever hit it in practice. However, here we have set it to the much smaller value + // of 7. This means that once the krls object accumulates 7 dictionary vectors it will + // start discarding old ones in favor of new ones as it goes through the training process. + // In other words, the algorithm "forgets" about old training data and focuses on recent + // training samples. So the bigger the maximum dictionary size the longer its memory will + // be. But in this example program we are doing filtering so we only care about the most + // recent data. So using a small value is appropriate here since it will result in much + // faster filtering and won't introduce much error. + krls<kernel_type> test(kernel_type(0.05),0.001,7); + + dlib::rand rnd; + + // Now let's loop over a big range of values from the sinc() function. Each time + // adding some random noise to the data we send to the krls object for training. + sample_type m; + double mse_noise = 0; + double mse = 0; + double count = 0; + for (double x = -20; x <= 20; x += 0.01) + { + m(0) = x; + // get a random number between -0.5 and 0.5 + const double noise = rnd.get_random_double()-0.5; + + // train on this new sample + test.train(m, sinc(x)+noise); + + // once we have seen a bit of data start measuring the mean squared prediction error. + // Also measure the mean squared error due to the noise. + if (x > -19) + { + ++count; + mse += pow(sinc(x) - test(m),2); + mse_noise += pow(noise,2); + } + } + + mse /= count; + mse_noise /= count; + + // Output the ratio of the error from the noise and the mean squared prediction error. + cout << "prediction error: " << mse << endl; + cout << "noise: " << mse_noise << endl; + cout << "ratio of noise to prediction error: " << mse_noise/mse << endl; + + // When the program runs it should print the following: + // prediction error: 0.00735201 + // noise: 0.0821628 + // ratio of noise to prediction error: 11.1756 + + // And we see that the noise has been significantly reduced by filtering the points + // through the krls object. + +} + + diff --git a/ml/dlib/examples/krr_classification_ex.cpp b/ml/dlib/examples/krr_classification_ex.cpp new file mode 100644 index 00000000..42648351 --- /dev/null +++ b/ml/dlib/examples/krr_classification_ex.cpp @@ -0,0 +1,205 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the kernel ridge regression + object from the dlib C++ Library. + + This example creates a simple set of data to train on and then shows + you how to use the kernel ridge regression tool to find a good decision + function that can classify examples in our data set. + + + The data used in this example will be 2 dimensional data and will + come from a distribution where points with a distance less than 13 + from the origin are labeled +1 and all other points are labeled + as -1. All together, the dataset will contain 10201 sample points. + +*/ + + +#include <iostream> +#include <dlib/svm.h> + +using namespace std; +using namespace dlib; + + +int main() +{ + // This typedef declares a matrix with 2 rows and 1 column. It will be the + // object that contains each of our 2 dimensional samples. (Note that if you wanted + // more than 2 features in this vector you can simply change the 2 to something else. + // Or if you don't know how many features you want until runtime then you can put a 0 + // here and use the matrix.set_size() member function) + typedef matrix<double, 2, 1> sample_type; + + // This is a typedef for the type of kernel we are going to use in this example. + // In this case I have selected the radial basis kernel that can operate on our + // 2D sample_type objects + typedef radial_basis_kernel<sample_type> kernel_type; + + + // Now we make objects to contain our samples and their respective labels. + std::vector<sample_type> samples; + std::vector<double> labels; + + // Now let's put some data into our samples and labels objects. We do this + // by looping over a bunch of points and labeling them according to their + // distance from the origin. + for (double r = -20; r <= 20; r += 0.4) + { + for (double c = -20; c <= 20; c += 0.4) + { + sample_type samp; + samp(0) = r; + samp(1) = c; + samples.push_back(samp); + + // if this point is less than 13 from the origin + if (sqrt((double)r*r + c*c) <= 13) + labels.push_back(+1); + else + labels.push_back(-1); + + } + } + + cout << "samples generated: " << samples.size() << endl; + cout << " number of +1 samples: " << sum(mat(labels) > 0) << endl; + cout << " number of -1 samples: " << sum(mat(labels) < 0) << endl; + + // Here we normalize all the samples by subtracting their mean and dividing by their standard deviation. + // This is generally a good idea since it often heads off numerical stability problems and also + // prevents one large feature from smothering others. Doing this doesn't matter much in this example + // so I'm just doing this here so you can see an easy way to accomplish this with + // the library. + vector_normalizer<sample_type> normalizer; + // let the normalizer learn the mean and standard deviation of the samples + normalizer.train(samples); + // now normalize each sample + for (unsigned long i = 0; i < samples.size(); ++i) + samples[i] = normalizer(samples[i]); + + + // here we make an instance of the krr_trainer object that uses our kernel type. + krr_trainer<kernel_type> trainer; + + // The krr_trainer has the ability to perform leave-one-out cross-validation. + // It does this to automatically determine the regularization parameter. Since + // we are performing classification instead of regression we should be sure to + // call use_classification_loss_for_loo_cv(). This function tells it to measure + // errors in terms of the number of classification mistakes instead of mean squared + // error between decision function output values and labels. + trainer.use_classification_loss_for_loo_cv(); + + + // Now we loop over some different gamma values to see how good they are. + cout << "\ndoing leave-one-out cross-validation" << endl; + for (double gamma = 0.000001; gamma <= 1; gamma *= 5) + { + // tell the trainer the parameters we want to use + trainer.set_kernel(kernel_type(gamma)); + + // loo_values will contain the LOO predictions for each sample. In the case + // of perfect prediction it will end up being a copy of labels. + std::vector<double> loo_values; + trainer.train(samples, labels, loo_values); + + // Print gamma and the fraction of samples correctly classified during LOO cross-validation. + const double classification_accuracy = mean_sign_agreement(labels, loo_values); + cout << "gamma: " << gamma << " LOO accuracy: " << classification_accuracy << endl; + } + + + // From looking at the output of the above loop it turns out that a good value for + // gamma for this problem is 0.000625. So that is what we will use. + trainer.set_kernel(kernel_type(0.000625)); + typedef decision_function<kernel_type> dec_funct_type; + typedef normalized_function<dec_funct_type> funct_type; + + + // Here we are making an instance of the normalized_function object. This object provides a convenient + // way to store the vector normalization information along with the decision function we are + // going to learn. + funct_type learned_function; + learned_function.normalizer = normalizer; // save normalization information + learned_function.function = trainer.train(samples, labels); // perform the actual training and save the results + + // print out the number of basis vectors in the resulting decision function + cout << "\nnumber of basis vectors in our learned_function is " + << learned_function.function.basis_vectors.size() << endl; + + // Now let's try this decision_function on some samples we haven't seen before. + // The decision function will return values >= 0 for samples it predicts + // are in the +1 class and numbers < 0 for samples it predicts to be in the -1 class. + sample_type sample; + + sample(0) = 3.123; + sample(1) = 2; + cout << "This is a +1 class example, the classifier output is " << learned_function(sample) << endl; + + sample(0) = 3.123; + sample(1) = 9.3545; + cout << "This is a +1 class example, the classifier output is " << learned_function(sample) << endl; + + sample(0) = 13.123; + sample(1) = 9.3545; + cout << "This is a -1 class example, the classifier output is " << learned_function(sample) << endl; + + sample(0) = 13.123; + sample(1) = 0; + cout << "This is a -1 class example, the classifier output is " << learned_function(sample) << endl; + + + // We can also train a decision function that reports a well conditioned probability + // instead of just a number > 0 for the +1 class and < 0 for the -1 class. An example + // of doing that follows: + typedef probabilistic_decision_function<kernel_type> probabilistic_funct_type; + typedef normalized_function<probabilistic_funct_type> pfunct_type; + + // The train_probabilistic_decision_function() is going to perform 3-fold cross-validation. + // So it is important that the +1 and -1 samples be distributed uniformly across all the folds. + // calling randomize_samples() will make sure that is the case. + randomize_samples(samples, labels); + + pfunct_type learned_pfunct; + learned_pfunct.normalizer = normalizer; + learned_pfunct.function = train_probabilistic_decision_function(trainer, samples, labels, 3); + // Now we have a function that returns the probability that a given sample is of the +1 class. + + // print out the number of basis vectors in the resulting decision function. + // (it should be the same as in the one above) + cout << "\nnumber of basis vectors in our learned_pfunct is " + << learned_pfunct.function.decision_funct.basis_vectors.size() << endl; + + sample(0) = 3.123; + sample(1) = 2; + cout << "This +1 class example should have high probability. Its probability is: " + << learned_pfunct(sample) << endl; + + sample(0) = 3.123; + sample(1) = 9.3545; + cout << "This +1 class example should have high probability. Its probability is: " + << learned_pfunct(sample) << endl; + + sample(0) = 13.123; + sample(1) = 9.3545; + cout << "This -1 class example should have low probability. Its probability is: " + << learned_pfunct(sample) << endl; + + sample(0) = 13.123; + sample(1) = 0; + cout << "This -1 class example should have low probability. Its probability is: " + << learned_pfunct(sample) << endl; + + + + // Another thing that is worth knowing is that just about everything in dlib is serializable. + // So for example, you can save the learned_pfunct object to disk and recall it later like so: + serialize("saved_function.dat") << learned_pfunct; + + // Now let's open that file back up and load the function object it contains. + deserialize("saved_function.dat") >> learned_pfunct; + +} + diff --git a/ml/dlib/examples/krr_regression_ex.cpp b/ml/dlib/examples/krr_regression_ex.cpp new file mode 100644 index 00000000..26c1412d --- /dev/null +++ b/ml/dlib/examples/krr_regression_ex.cpp @@ -0,0 +1,104 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This is an example illustrating the use of the kernel ridge regression + object from the dlib C++ Library. + + This example will train on data from the sinc function. + +*/ + +#include <iostream> +#include <vector> + +#include <dlib/svm.h> + +using namespace std; +using namespace dlib; + +// Here is the sinc function we will be trying to learn with kernel ridge regression +double sinc(double x) +{ + if (x == 0) + return 1; + return sin(x)/x; +} + +int main() +{ + // Here we declare that our samples will be 1 dimensional column vectors. + typedef matrix<double,1,1> sample_type; + + // Now sample some points from the sinc() function + sample_type m; + std::vector<sample_type> samples; + std::vector<double> labels; + for (double x = -10; x <= 4; x += 1) + { + m(0) = x; + samples.push_back(m); + labels.push_back(sinc(x)); + } + + // Now we are making a typedef for the kind of kernel we want to use. I picked the + // radial basis kernel because it only has one parameter and generally gives good + // results without much fiddling. + typedef radial_basis_kernel<sample_type> kernel_type; + + // Here we declare an instance of the krr_trainer object. This is the + // object that we will later use to do the training. + krr_trainer<kernel_type> trainer; + + // Here we set the kernel we want to use for training. The radial_basis_kernel + // has a parameter called gamma that we need to determine. As a rule of thumb, a good + // gamma to try is 1.0/(mean squared distance between your sample points). So + // below we are using a similar value computed from at most 2000 randomly selected + // samples. + const double gamma = 3.0/compute_mean_squared_distance(randomly_subsample(samples, 2000)); + cout << "using gamma of " << gamma << endl; + trainer.set_kernel(kernel_type(gamma)); + + // now train a function based on our sample points + decision_function<kernel_type> test = trainer.train(samples, labels); + + // now we output the value of the sinc function for a few test points as well as the + // value predicted by our regression. + m(0) = 2.5; cout << sinc(m(0)) << " " << test(m) << endl; + m(0) = 0.1; cout << sinc(m(0)) << " " << test(m) << endl; + m(0) = -4; cout << sinc(m(0)) << " " << test(m) << endl; + m(0) = 5.0; cout << sinc(m(0)) << " " << test(m) << endl; + + // The output is as follows: + //using gamma of 0.075 + // 0.239389 0.239389 + // 0.998334 0.998362 + // -0.189201 -0.189254 + // -0.191785 -0.186618 + + // The first column is the true value of the sinc function and the second + // column is the output from the krr estimate. + + + // Note that the krr_trainer has the ability to tell us the leave-one-out predictions + // for each sample. + std::vector<double> loo_values; + trainer.train(samples, labels, loo_values); + cout << "mean squared LOO error: " << mean_squared_error(labels,loo_values) << endl; + cout << "R^2 LOO value: " << r_squared(labels,loo_values) << endl; + // Which outputs the following: + // mean squared LOO error: 8.29575e-07 + // R^2 LOO value: 0.999995 + + + + + + // Another thing that is worth knowing is that just about everything in dlib is serializable. + // So for example, you can save the test object to disk and recall it later like so: + serialize("saved_function.dat") << test; + + // Now let's open that file back up and load the function object it contains. + deserialize("saved_function.dat") >> test; + +} + + diff --git a/ml/dlib/examples/learning_to_track_ex.cpp b/ml/dlib/examples/learning_to_track_ex.cpp new file mode 100644 index 00000000..2f9f3947 --- /dev/null +++ b/ml/dlib/examples/learning_to_track_ex.cpp @@ -0,0 +1,354 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This example shows how you can use the dlib machine learning tools to make + an object tracker. Depending on your tracking application there can be a + lot of components to a tracker. However, a central element of many trackers + is the "detection to track" association step and this is the part of the + tracker we discuss in this example. Therefore, in the code below we define + simple detection and track structures and then go through the steps needed + to learn, using training data, how to best associate detections to tracks. + + It should be noted that these tools are implemented essentially as wrappers + around the more general assignment learning tools present in dlib. So if + you want to get an idea of how they work under the covers you should read + the assignment_learning_ex.cpp example program and its supporting + documentation. However, to just use the learning-to-track tools you won't + need to understand these implementation details. +*/ + + +#include <iostream> +#include <dlib/svm_threaded.h> +#include <dlib/rand.h> + +using namespace std; +using namespace dlib; + +// ---------------------------------------------------------------------------------------- + +struct detection +{ + /* + When you use these tools you need to define two structures. One represents a + detection and another a track. In this example we call these structures detection + and track but you can name them however you like. Moreover, You can put anything + you want in your detection structure. The only requirement is that detection be + copyable and contain a public typedef named track_type that tells us the track type + meant for use with this detection object. + */ + typedef struct track track_type; + + + + // Again, note that this field is NOT REQUIRED by the dlib tools. You can put whatever + // you want in your detection object. Here we are including a column vector of + // measurements from the sensor that generated the detection. In this example we don't + // have a real sensor so we will simulate a very basic one using a random number + // generator. But the idea is that you should be able to use the contents of your + // detection to somehow tell which track it goes with. So these numbers should contain + // some identifying information about the real world object that caused this detection. + matrix<double,0,1> measurements; +}; + + +struct track +{ + /* + Here we define our corresponding track object. This object has more requirements + than the detection. In particular, the dlib machine learning tools require it to + have the following elements: + - A typedef named feature_vector_type + - It should be copyable and default constructable + - The three functions: get_similarity_features(), update_track(), and propagate_track() + + Just like the detection object, you can also add any additional fields you like. + In this example we keep it simple and say that a track maintains only a copy of the + most recent sensor measurements it has seen and also a number telling us how long + it has been since the track was updated with a detection. + */ + + // This type should be a dlib::matrix capable of storing column vectors or an + // unsorted sparse vector type such as std::vector<std::pair<unsigned long,double>>. + typedef matrix<double,0,1> feature_vector_type; + + track() + { + time_since_last_association = 0; + } + + void get_similarity_features(const detection& det, feature_vector_type& feats) const + { + /* + The get_similarity_features() function takes a detection and outputs a feature + vector that tells the machine learning tools how "similar" the detection is to + the track. The idea here is to output a set of numbers (i.e. the contents of + feats) that can be used to decide if det should be associated with this track. + In this example we output the difference between the last sensor measurements + for this track and the detection's measurements. This works since we expect + the sensor measurements to be relatively constant for each track because that's + how our simple sensor simulator in this example works. However, in a real + world application it's likely to be much more complex. But here we keep things + simple. + + It should also be noted that get_similarity_features() must always output + feature vectors with the same number of dimensions. Finally, the machine + learning tools are going to learn a linear function of feats and use that to + predict if det should associate to this track. So try and define features that + you think would work in a linear function. There are all kinds of ways to do + this. If you want to get really clever about it you can even use kernel + methods like the empirical_kernel_map (see empirical_kernel_map_ex.cpp). I + would start out with something simple first though. + */ + feats = abs(last_measurements - det.measurements); + } + + void update_track(const detection& det) + { + /* + This function is called when the dlib tools have decided that det should be + associated with this track. So the point of update_track() is to, as the name + suggests, update the track with the given detection. In general, you can do + whatever you want in this function. Here we simply record the last measurement + state and reset the time since last association. + */ + last_measurements = det.measurements; + time_since_last_association = 0; + } + + void propagate_track() + { + /* + This function is called when the dlib tools have decided, for the current time + step, that none of the available detections associate with this track. So the + point of this function is to perform a track update without a detection. To + say that another way. Every time you ask the dlib tools to perform detection + to track association they will update each track by calling either + update_track() or propagate_track(). Which function they call depends on + whether or not a detection was associated to the track. + */ + ++time_since_last_association; + } + + matrix<double,0,1> last_measurements; + unsigned long time_since_last_association; +}; + +// ---------------------------------------------------------------------------------------- + +/* + Now that we have defined our detection and track structures we are going to define our + sensor simulator. In it we will imagine that there are num_objects things in the world + and those things generate detections from our sensor. Moreover, each detection from + the sensor comes with a measurement vector with num_properties elements. + + So the first function, initialize_object_properties(), just randomly generates + num_objects and saves them in a global variable. Then when we are generating + detections we will output copies of these objects that have been corrupted by a little + bit of random noise. +*/ + +dlib::rand rnd; +const long num_objects = 4; +const long num_properties = 6; +std::vector<matrix<double,0,1> > object_properties(num_objects); + +void initialize_object_properties() +{ + for (unsigned long i = 0; i < object_properties.size(); ++i) + object_properties[i] = randm(num_properties,1,rnd); +} + +// So here is our function that samples a detection from our simulated sensor. You tell it +// what object you want to sample a detection from and it returns a detection from that +// object. +detection sample_detection_from_sensor(long object_id) +{ + DLIB_CASSERT(object_id < num_objects, + "You can't ask to sample a detection from an object that doesn't exist."); + detection temp; + // Set the measurements equal to the object's true property values plus a little bit of + // noise. + temp.measurements = object_properties[object_id] + randm(num_properties,1,rnd)*0.1; + return temp; +} + +// ---------------------------------------------------------------------------------------- + +typedef std::vector<labeled_detection<detection> > detections_at_single_time_step; +typedef std::vector<detections_at_single_time_step> track_history; + +track_history make_random_tracking_data_for_training() +{ + /* + Since we are using machine learning we need some training data. This function + samples data from our sensor and creates labeled track histories. In these track + histories, each detection is labeled with its true track ID. The goal of the + machine learning tools will then be to learn to associate all the detections with + the same ID to the same track object. + */ + + track_history data; + + // At each time step we get a set of detections from the objects in the world. + // Simulate 100 time steps worth of data where there are 3 objects present. + const int num_time_steps = 100; + for (int i = 0; i < num_time_steps; ++i) + { + detections_at_single_time_step dets(3); + // sample a detection from object 0 + dets[0].det = sample_detection_from_sensor(0); + dets[0].label = 0; + + // sample a detection from object 1 + dets[1].det = sample_detection_from_sensor(1); + dets[1].label = 1; + + // sample a detection from object 2 + dets[2].det = sample_detection_from_sensor(2); + dets[2].label = 2; + + data.push_back(dets); + } + + // Now let's imagine object 1 and 2 are gone but a new object, object 3 has arrived. + for (int i = 0; i < num_time_steps; ++i) + { + detections_at_single_time_step dets(2); + // sample a detection from object 0 + dets[0].det = sample_detection_from_sensor(0); + dets[0].label = 0; + + // sample a detection from object 3 + dets[1].det = sample_detection_from_sensor(3); + dets[1].label = 3; + + data.push_back(dets); + } + + return data; +} + +// ---------------------------------------------------------------------------------------- + +std::vector<detection> make_random_detections(long num_dets) +{ + /* + Finally, when we test the tracker we learned we will need to sample regular old + unlabeled detections. This function helps us do that. + */ + DLIB_CASSERT(num_dets <= num_objects, + "You can't ask for more detections than there are objects in our little simulation."); + + std::vector<detection> dets(num_dets); + for (unsigned long i = 0; i < dets.size(); ++i) + { + dets[i] = sample_detection_from_sensor(i); + } + return dets; +} + +// ---------------------------------------------------------------------------------------- + +int main() +{ + initialize_object_properties(); + + + // Get some training data. Here we sample 5 independent track histories. In a real + // world problem you would get this kind of data by, for example, collecting data from + // your sensor on 5 separate days where you did an independent collection each day. + // You can train a model with just one track history but the more you have the better. + std::vector<track_history> data; + data.push_back(make_random_tracking_data_for_training()); + data.push_back(make_random_tracking_data_for_training()); + data.push_back(make_random_tracking_data_for_training()); + data.push_back(make_random_tracking_data_for_training()); + data.push_back(make_random_tracking_data_for_training()); + + + structural_track_association_trainer trainer; + // Note that the machine learning tools have a parameter. This is the usual SVM C + // parameter that controls the trade-off between trying to fit the training data or + // producing a "simpler" solution. You need to try a few different values of this + // parameter to find out what setting works best for your problem (try values in the + // range 0.001 to 1000000). + trainer.set_c(100); + // Now do the training. + track_association_function<detection> assoc = trainer.train(data); + + // We can test the accuracy of the learned association function on some track history + // data. Here we test it on the data we trained on. It outputs a single number that + // measures the fraction of detections which were correctly associated to their tracks. + // So a value of 1 indicates perfect tracking and a value of 0 indicates totally wrong + // tracking. + cout << "Association accuracy on training data: "<< test_track_association_function(assoc, data) << endl; + // It's very important to test the output of a machine learning method on data it + // wasn't trained on. You can do that by calling test_track_association_function() on + // held out data. You can also use cross-validation like so: + cout << "Association accuracy from 5-fold CV: "<< cross_validate_track_association_trainer(trainer, data, 5) << endl; + // Unsurprisingly, the testing functions show that the assoc function we learned + // perfectly associates all detections to tracks in this easy data. + + + + + // OK. So how do you use this assoc thing? Let's use it to do some tracking! + + // tracks contains all our current tracks. Initially it is empty. + std::vector<track> tracks; + cout << "number of tracks: "<< tracks.size() << endl; + + // Sample detections from 3 objects. + std::vector<detection> dets = make_random_detections(3); + // Calling assoc(), the function we just learned, performs the detection to track + // association. It will also call each track's update_track() function with the + // associated detection. For tracks that don't get a detection, it calls + // propagate_track(). + assoc(tracks, dets); + // Now there are 3 things in tracks. + cout << "number of tracks: "<< tracks.size() << endl; + + // Run the tracker for a few more time steps... + dets = make_random_detections(3); + assoc(tracks, dets); + cout << "number of tracks: "<< tracks.size() << endl; + + dets = make_random_detections(3); + assoc(tracks, dets); + cout << "number of tracks: "<< tracks.size() << endl; + + // Now another object has appeared! There are 4 objects now. + dets = make_random_detections(4); + assoc(tracks, dets); + // Now there are 4 tracks instead of 3! + cout << "number of tracks: "<< tracks.size() << endl; + + // That 4th object just vanished. Let's look at the time_since_last_association values + // for each track. We will see that one of the tracks isn't getting updated with + // detections anymore since the object it corresponds to is no longer present. + dets = make_random_detections(3); + assoc(tracks, dets); + cout << "number of tracks: "<< tracks.size() << endl; + for (unsigned long i = 0; i < tracks.size(); ++i) + cout << " time since last association: "<< tracks[i].time_since_last_association << endl; + + dets = make_random_detections(3); + assoc(tracks, dets); + cout << "number of tracks: "<< tracks.size() << endl; + for (unsigned long i = 0; i < tracks.size(); ++i) + cout << " time since last association: "<< tracks[i].time_since_last_association << endl; + + + + + + + // Finally, you can save your track_association_function to disk like so: + serialize("track_assoc.svm") << assoc; + + // And recall it from disk later like so: + deserialize("track_assoc.svm") >> assoc; +} + +// ---------------------------------------------------------------------------------------- + diff --git a/ml/dlib/examples/least_squares_ex.cpp b/ml/dlib/examples/least_squares_ex.cpp new file mode 100644 index 00000000..875790b2 --- /dev/null +++ b/ml/dlib/examples/least_squares_ex.cpp @@ -0,0 +1,228 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use the general purpose non-linear + least squares optimization routines from the dlib C++ Library. + + This example program will demonstrate how these routines can be used for data fitting. + In particular, we will generate a set of data and then use the least squares + routines to infer the parameters of the model which generated the data. +*/ + + +#include <dlib/optimization.h> +#include <iostream> +#include <vector> + + +using namespace std; +using namespace dlib; + +// ---------------------------------------------------------------------------------------- + +typedef matrix<double,2,1> input_vector; +typedef matrix<double,3,1> parameter_vector; + +// ---------------------------------------------------------------------------------------- + +// We will use this function to generate data. It represents a function of 2 variables +// and 3 parameters. The least squares procedure will be used to infer the values of +// the 3 parameters based on a set of input/output pairs. +double model ( + const input_vector& input, + const parameter_vector& params +) +{ + const double p0 = params(0); + const double p1 = params(1); + const double p2 = params(2); + + const double i0 = input(0); + const double i1 = input(1); + + const double temp = p0*i0 + p1*i1 + p2; + + return temp*temp; +} + +// ---------------------------------------------------------------------------------------- + +// This function is the "residual" for a least squares problem. It takes an input/output +// pair and compares it to the output of our model and returns the amount of error. The idea +// is to find the set of parameters which makes the residual small on all the data pairs. +double residual ( + const std::pair<input_vector, double>& data, + const parameter_vector& params +) +{ + return model(data.first, params) - data.second; +} + +// ---------------------------------------------------------------------------------------- + +// This function is the derivative of the residual() function with respect to the parameters. +parameter_vector residual_derivative ( + const std::pair<input_vector, double>& data, + const parameter_vector& params +) +{ + parameter_vector der; + + const double p0 = params(0); + const double p1 = params(1); + const double p2 = params(2); + + const double i0 = data.first(0); + const double i1 = data.first(1); + + const double temp = p0*i0 + p1*i1 + p2; + + der(0) = i0*2*temp; + der(1) = i1*2*temp; + der(2) = 2*temp; + + return der; +} + +// ---------------------------------------------------------------------------------------- + +int main() +{ + try + { + // randomly pick a set of parameters to use in this example + const parameter_vector params = 10*randm(3,1); + cout << "params: " << trans(params) << endl; + + + // Now let's generate a bunch of input/output pairs according to our model. + std::vector<std::pair<input_vector, double> > data_samples; + input_vector input; + for (int i = 0; i < 1000; ++i) + { + input = 10*randm(2,1); + const double output = model(input, params); + + // save the pair + data_samples.push_back(make_pair(input, output)); + } + + // Before we do anything, let's make sure that our derivative function defined above matches + // the approximate derivative computed using central differences (via derivative()). + // If this value is big then it means we probably typed the derivative function incorrectly. + cout << "derivative error: " << length(residual_derivative(data_samples[0], params) - + derivative(residual)(data_samples[0], params) ) << endl; + + + + + + // Now let's use the solve_least_squares_lm() routine to figure out what the + // parameters are based on just the data_samples. + parameter_vector x; + x = 1; + + cout << "Use Levenberg-Marquardt" << endl; + // Use the Levenberg-Marquardt method to determine the parameters which + // minimize the sum of all squared residuals. + solve_least_squares_lm(objective_delta_stop_strategy(1e-7).be_verbose(), + residual, + residual_derivative, + data_samples, + x); + + // Now x contains the solution. If everything worked it will be equal to params. + cout << "inferred parameters: "<< trans(x) << endl; + cout << "solution error: "<< length(x - params) << endl; + cout << endl; + + + + + x = 1; + cout << "Use Levenberg-Marquardt, approximate derivatives" << endl; + // If we didn't create the residual_derivative function then we could + // have used this method which numerically approximates the derivatives for you. + solve_least_squares_lm(objective_delta_stop_strategy(1e-7).be_verbose(), + residual, + derivative(residual), + data_samples, + x); + + // Now x contains the solution. If everything worked it will be equal to params. + cout << "inferred parameters: "<< trans(x) << endl; + cout << "solution error: "<< length(x - params) << endl; + cout << endl; + + + + + x = 1; + cout << "Use Levenberg-Marquardt/quasi-newton hybrid" << endl; + // This version of the solver uses a method which is appropriate for problems + // where the residuals don't go to zero at the solution. So in these cases + // it may provide a better answer. + solve_least_squares(objective_delta_stop_strategy(1e-7).be_verbose(), + residual, + residual_derivative, + data_samples, + x); + + // Now x contains the solution. If everything worked it will be equal to params. + cout << "inferred parameters: "<< trans(x) << endl; + cout << "solution error: "<< length(x - params) << endl; + + } + catch (std::exception& e) + { + cout << e.what() << endl; + } +} + +// Example output: +/* +params: 8.40188 3.94383 7.83099 + +derivative error: 9.78267e-06 +Use Levenberg-Marquardt +iteration: 0 objective: 2.14455e+10 +iteration: 1 objective: 1.96248e+10 +iteration: 2 objective: 1.39172e+10 +iteration: 3 objective: 1.57036e+09 +iteration: 4 objective: 2.66917e+07 +iteration: 5 objective: 4741.9 +iteration: 6 objective: 0.000238674 +iteration: 7 objective: 7.8815e-19 +iteration: 8 objective: 0 +inferred parameters: 8.40188 3.94383 7.83099 + +solution error: 0 + +Use Levenberg-Marquardt, approximate derivatives +iteration: 0 objective: 2.14455e+10 +iteration: 1 objective: 1.96248e+10 +iteration: 2 objective: 1.39172e+10 +iteration: 3 objective: 1.57036e+09 +iteration: 4 objective: 2.66917e+07 +iteration: 5 objective: 4741.87 +iteration: 6 objective: 0.000238701 +iteration: 7 objective: 1.0571e-18 +iteration: 8 objective: 4.12469e-22 +inferred parameters: 8.40188 3.94383 7.83099 + +solution error: 5.34754e-15 + +Use Levenberg-Marquardt/quasi-newton hybrid +iteration: 0 objective: 2.14455e+10 +iteration: 1 objective: 1.96248e+10 +iteration: 2 objective: 1.3917e+10 +iteration: 3 objective: 1.5572e+09 +iteration: 4 objective: 2.74139e+07 +iteration: 5 objective: 5135.98 +iteration: 6 objective: 0.000285539 +iteration: 7 objective: 1.15441e-18 +iteration: 8 objective: 3.38834e-23 +inferred parameters: 8.40188 3.94383 7.83099 + +solution error: 1.77636e-15 +*/ diff --git a/ml/dlib/examples/linear_manifold_regularizer_ex.cpp b/ml/dlib/examples/linear_manifold_regularizer_ex.cpp new file mode 100644 index 00000000..9c6f10f2 --- /dev/null +++ b/ml/dlib/examples/linear_manifold_regularizer_ex.cpp @@ -0,0 +1,284 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the linear_manifold_regularizer + and empirical_kernel_map from the dlib C++ Library. + + This example program assumes you are familiar with some general elements of + the library. In particular, you should have at least read the svm_ex.cpp + and matrix_ex.cpp examples. You should also have read the empirical_kernel_map_ex.cpp + example program as the present example builds upon it. + + + + This program shows an example of what is called semi-supervised learning. + That is, a small amount of labeled data is augmented with a large amount + of unlabeled data. A learning algorithm is then run on all the data + and the hope is that by including the unlabeled data we will end up with + a better result. + + + In this particular example we will generate 200,000 sample points of + unlabeled data along with 2 samples of labeled data. The sample points + will be drawn randomly from two concentric circles. One labeled data + point will be drawn from each circle. The goal is to learn to + correctly separate the two circles using only the 2 labeled points + and the unlabeled data. + + To do this we will first run an approximate form of k nearest neighbors + to determine which of the unlabeled samples are closest together. We will + then make the manifold assumption, that is, we will assume that points close + to each other should share the same classification label. + + Once we have determined which points are near neighbors we will use the + empirical_kernel_map and linear_manifold_regularizer to transform all the + data points into a new vector space where any linear rule will have similar + output for points which we have decided are near neighbors. + + Finally, we will classify all the unlabeled data according to which of + the two labeled points are nearest. Normally this would not work but by + using the manifold assumption we will be able to successfully classify + all the unlabeled data. + + + + For further information on this subject you should begin with the following + paper as it discusses a very similar application of manifold regularization. + + Beyond the Point Cloud: from Transductive to Semi-supervised Learning + by Vikas Sindhwani, Partha Niyogi, and Mikhail Belkin + + + + + ******** SAMPLE PROGRAM OUTPUT ******** + + Testing manifold regularization with an intrinsic_regularization_strength of 0. + number of edges generated: 49998 + Running simple test... + error: 0.37022 + error: 0.44036 + error: 0.376715 + error: 0.307545 + error: 0.463455 + error: 0.426065 + error: 0.416155 + error: 0.288295 + error: 0.400115 + error: 0.46347 + + Testing manifold regularization with an intrinsic_regularization_strength of 10000. + number of edges generated: 49998 + Running simple test... + error: 0 + error: 0 + error: 0 + error: 0 + error: 0 + error: 0 + error: 0 + error: 0 + error: 0 + error: 0 + + +*/ + +#include <dlib/manifold_regularization.h> +#include <dlib/svm.h> +#include <dlib/rand.h> +#include <dlib/statistics.h> +#include <iostream> +#include <vector> +#include <ctime> + + +using namespace std; +using namespace dlib; + +// ---------------------------------------------------------------------------------------- + +// First let's make a typedef for the kind of samples we will be using. +typedef matrix<double, 0, 1> sample_type; + +// We will be using the radial_basis_kernel in this example program. +typedef radial_basis_kernel<sample_type> kernel_type; + +// ---------------------------------------------------------------------------------------- + +void generate_circle ( + std::vector<sample_type>& samples, + double radius, + const long num +); +/*! + requires + - num > 0 + - radius > 0 + ensures + - generates num points centered at (0,0) with the given radius. Adds these + points into the given samples vector. +!*/ + +// ---------------------------------------------------------------------------------------- + +void test_manifold_regularization ( + const double intrinsic_regularization_strength +); +/*! + ensures + - Runs an example test using the linear_manifold_regularizer with the given + intrinsic_regularization_strength. +!*/ + +// ---------------------------------------------------------------------------------------- + +int main() +{ + // Run the test without any manifold regularization. + test_manifold_regularization(0); + + // Run the test with manifold regularization. You can think of this number as + // a measure of how much we trust the manifold assumption. So if you are really + // confident that you can select neighboring points which should have the same + // classification then make this number big. + test_manifold_regularization(10000.0); +} + +// ---------------------------------------------------------------------------------------- + +void test_manifold_regularization ( + const double intrinsic_regularization_strength +) +{ + cout << "Testing manifold regularization with an intrinsic_regularization_strength of " + << intrinsic_regularization_strength << ".\n"; + + std::vector<sample_type> samples; + + // Declare an instance of the kernel we will be using. + const kernel_type kern(0.1); + + const unsigned long num_points = 100000; + + // create a large dataset with two concentric circles. There will be 100000 points on each circle + // for a total of 200000 samples. + generate_circle(samples, 2, num_points); // circle of radius 2 + generate_circle(samples, 4, num_points); // circle of radius 4 + + // Create a set of sample_pairs that tells us which samples are "close" and should thus + // be classified similarly. These edges will be used to define the manifold regularizer. + // To find these edges we use a simple function that samples point pairs randomly and + // returns the top 5% with the shortest edges. + std::vector<sample_pair> edges; + find_percent_shortest_edges_randomly(samples, squared_euclidean_distance(), 0.05, 1000000, time(0), edges); + + cout << "number of edges generated: " << edges.size() << endl; + + empirical_kernel_map<kernel_type> ekm; + + // Since the circles are not linearly separable we will use an empirical kernel map to + // map them into a space where they are separable. We create an empirical_kernel_map + // using a random subset of our data samples as basis samples. Note, however, that even + // though the circles are linearly separable in this new space given by the empirical_kernel_map + // we still won't be able to correctly classify all the points given just the 2 labeled examples. + // We will need to make use of the nearest neighbor information stored in edges. To do that + // we will use the linear_manifold_regularizer. + ekm.load(kern, randomly_subsample(samples, 50)); + + // Project all the samples into the span of our 50 basis samples + for (unsigned long i = 0; i < samples.size(); ++i) + samples[i] = ekm.project(samples[i]); + + + // Now create the manifold regularizer. The result is a transformation matrix that + // embodies the manifold assumption discussed above. + linear_manifold_regularizer<sample_type> lmr; + // use_gaussian_weights is a function object that tells lmr how to weight each edge. In this + // case we let the weight decay as edges get longer. So shorter edges are more important than + // longer edges. + lmr.build(samples, edges, use_gaussian_weights(0.1)); + const matrix<double> T = lmr.get_transformation_matrix(intrinsic_regularization_strength); + + // Apply the transformation generated by the linear_manifold_regularizer to + // all our samples. + for (unsigned long i = 0; i < samples.size(); ++i) + samples[i] = T*samples[i]; + + + // For convenience, generate a projection_function and merge the transformation + // matrix T into it. That is, we will have: proj(x) == T*ekm.project(x). + projection_function<kernel_type> proj = ekm.get_projection_function(); + proj.weights = T*proj.weights; + + cout << "Running simple test..." << endl; + + // Pick 2 different labeled points. One on the inner circle and another on the outer. + // For each of these test points we will see if using the single plane that separates + // them is a good way to separate the concentric circles. We also do this a bunch + // of times with different randomly chosen points so we can see how robust the result is. + for (int itr = 0; itr < 10; ++itr) + { + std::vector<sample_type> test_points; + // generate a random point from the radius 2 circle + generate_circle(test_points, 2, 1); + // generate a random point from the radius 4 circle + generate_circle(test_points, 4, 1); + + // project the two test points into kernel space. Recall that this projection_function + // has the manifold regularizer incorporated into it. + const sample_type class1_point = proj(test_points[0]); + const sample_type class2_point = proj(test_points[1]); + + double num_wrong = 0; + + // Now attempt to classify all the data samples according to which point + // they are closest to. The output of this program shows that without manifold + // regularization this test will fail but with it it will perfectly classify + // all the points. + for (unsigned long i = 0; i < samples.size(); ++i) + { + double distance_to_class1 = length(samples[i] - class1_point); + double distance_to_class2 = length(samples[i] - class2_point); + + bool predicted_as_class_1 = (distance_to_class1 < distance_to_class2); + + bool really_is_class_1 = (i < num_points); + + // now count how many times we make a mistake + if (predicted_as_class_1 != really_is_class_1) + ++num_wrong; + } + + cout << "error: "<< num_wrong/samples.size() << endl; + } + + cout << endl; +} + +// ---------------------------------------------------------------------------------------- + +dlib::rand rnd; + +void generate_circle ( + std::vector<sample_type>& samples, + double radius, + const long num +) +{ + sample_type m(2,1); + + for (long i = 0; i < num; ++i) + { + double sign = 1; + if (rnd.get_random_double() < 0.5) + sign = -1; + m(0) = 2*radius*rnd.get_random_double()-radius; + m(1) = sign*sqrt(radius*radius - m(0)*m(0)); + + samples.push_back(m); + } +} + +// ---------------------------------------------------------------------------------------- + diff --git a/ml/dlib/examples/logger_custom_output_ex.cpp b/ml/dlib/examples/logger_custom_output_ex.cpp new file mode 100644 index 00000000..6916e43d --- /dev/null +++ b/ml/dlib/examples/logger_custom_output_ex.cpp @@ -0,0 +1,73 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt + +/* + + This is an example showing how to control where the dlib::logger sends its messages. + This is done by creating a "hook" class that is called whenever any of the loggers want + to log a message. The hook class then outputs the messages using any method you like. + + + Prior to reading this example, you should understand the basics of the dlib::logger. + So you should have already read the logger_ex.cpp and logger_ex_2.cpp example programs. + +*/ + + +#include <dlib/logger.h> + +using namespace dlib; +using namespace std; + +class my_hook +{ +public: + my_hook( + ) + { + fout.open("my_log_file.txt"); + } + + void log ( + const string& logger_name, + const log_level& ll, + const uint64 thread_id, + const char* message_to_log + ) + { + // Log all messages from any logger to our log file. + fout << ll << " ["<<thread_id<<"] " << logger_name << ": " << message_to_log << endl; + + // But only log messages that are of LINFO priority or higher to the console. + if (ll >= LINFO) + cout << ll << " ["<<thread_id<<"] " << logger_name << ": " << message_to_log << endl; + } + +private: + ofstream fout; +}; + +int main() +{ + my_hook hook; + // This tells all dlib loggers to send their logging events to the hook object. That + // is, any time a logger generates a message it will call hook.log() with the message + // contents. Additionally, hook.log() will also only be called from one thread at a + // time so it is safe to use this kind of hook in a multi-threaded program with many + // loggers in many threads. + set_all_logging_output_hooks(hook); + // It should also be noted that the hook object must not be destructed while the + // loggers are still in use. So it is a good idea to declare the hook object + // somewhere where it will live the entire lifetime of the program, as we do here. + + + logger dlog("main"); + // Tell the dlog logger to emit a message for all logging events rather than its + // default behavior of only logging LERROR or above. + dlog.set_level(LALL); + + // All these message go to my_log_file.txt, but only the last two go to the console. + dlog << LDEBUG << "This is a debugging message."; + dlog << LINFO << "This is an informational message."; + dlog << LERROR << "An error message!"; +} + diff --git a/ml/dlib/examples/logger_ex.cpp b/ml/dlib/examples/logger_ex.cpp new file mode 100644 index 00000000..281e2ad1 --- /dev/null +++ b/ml/dlib/examples/logger_ex.cpp @@ -0,0 +1,70 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt + +/* + + This is a simple example illustrating the use of the logger object from + the dlib C++ Library. + + + The output of this program looks like this: + + 0 INFO [0] example: This is an informational message. + 0 DEBUG [0] example: The integer variable is set to 8 + 0 WARN [0] example: The variable is bigger than 4! Its value is 8 + 0 INFO [0] example: we are going to sleep for half a second. + 503 INFO [0] example: we just woke up + 503 INFO [0] example: program ending + + + The first column shows the number of milliseconds since program start at the time + the message was printed, then the logging level of the message, then the thread that + printed the message, then the logger's name and finally the message itself. + +*/ + + +#include <dlib/logger.h> +#include <dlib/misc_api.h> + +using namespace dlib; + +// Create a logger object somewhere. It is usually convenient to make it at the global scope +// which is what I am doing here. The following statement creates a logger that is named example. +logger dlog("example"); + +int main() +{ + // Every logger has a logging level (given by dlog.level()). Each log message is tagged with a + // level and only levels equal to or higher than dlog.level() will be printed. By default all + // loggers start with level() == LERROR. In this case I'm going to set the lowest level LALL + // which means that dlog will print all logging messages it gets. + dlog.set_level(LALL); + + + // print our first message. It will go to cout because that is the default. + dlog << LINFO << "This is an informational message."; + + // now print a debug message. + int variable = 8; + dlog << LDEBUG << "The integer variable is set to " << variable; + + // the logger can be used pretty much like any ostream object. But you have to give a logging + // level first. But after that you can chain << operators like normal. + + if (variable > 4) + dlog << LWARN << "The variable is bigger than 4! Its value is " << variable; + + + + dlog << LINFO << "we are going to sleep for half a second."; + // sleep for half a second + dlib::sleep(500); + dlog << LINFO << "we just woke up"; + + + + dlog << LINFO << "program ending"; +} + + + diff --git a/ml/dlib/examples/logger_ex_2.cpp b/ml/dlib/examples/logger_ex_2.cpp new file mode 100644 index 00000000..99332bff --- /dev/null +++ b/ml/dlib/examples/logger_ex_2.cpp @@ -0,0 +1,153 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt + +/* + + This is a somewhat complex example illustrating the use of the logger object + from the dlib C++ Library. It will demonstrate using multiple loggers and threads. + + + The output of this program looks like this: + 0 INFO [0] example: This is an informational message. + 0 WARN [0] example: The variable is bigger than 4! Its value is 8 + 0 INFO [0] example: make two threads + 0 WARN [0] example.test_class: warning! someone called warning()! + 0 INFO [0] example: we are going to sleep for half a second. + 0 INFO [1] example.thread: entering our thread + 0 WARN [1] example.test_class: warning! someone called warning()! + 0 INFO [2] example.thread: entering our thread + 0 WARN [2] example.test_class: warning! someone called warning()! + 203 INFO [1] example.thread: exiting our thread + 203 INFO [2] example.thread: exiting our thread + 503 INFO [0] example: we just woke up + 503 INFO [0] example: program ending + + +*/ + + +#include <dlib/logger.h> +#include <dlib/misc_api.h> +#include <dlib/threads.h> + +using namespace dlib; + +/* + Here we create three loggers. Note that it is the case that: + - logp.is_child_of(logp) == true + - logt.is_child_of(logp) == true + - logc.is_child_of(logp) == true + + logp is the child of itself because all loggers are their own children :) But the other + two are child loggers of logp because their names start with logp.name() + "." which means + that whenever you set a property on a logger it will also set that same property on all of + the logger's children. +*/ +logger logp("example"); +logger logt("example.thread"); +logger logc("example.test_class"); + +class test +{ +public: + test () + { + // this message won't get logged because LINFO is too low + logc << LINFO << "constructed a test object"; + } + + ~test () + { + // this message won't get logged because LINFO is too low + logc << LINFO << "destructed a test object"; + } + + void warning () + { + logc << LWARN << "warning! someone called warning()!"; + } +}; + +void thread (void*) +{ + logt << LINFO << "entering our thread"; + + + test mytest; + mytest.warning(); + + dlib::sleep(200); + + logt << LINFO << "exiting our thread"; +} + + +void setup_loggers ( +) +{ + // Create a logger that has the same name as our root logger logp. This isn't very useful in + // this example program but if you had loggers defined in other files then you might not have + // easy access to them when starting up your program and setting log levels. This mechanism + // allows you to manipulate the properties of any logger so long as you know its name. + logger temp_log("example"); + + // For this example I don't want to log debug messages so I'm setting the logging level of + // All our loggers to LINFO. Note that this statement sets all three of our loggers to this + // logging level because they are all children of temp_log. + temp_log.set_level(LINFO); + + + // In addition I only want the example.test_class to print LWARN or higher messages so I'm going + // to set that here too. Note that we set this value after calling temp_log.set_level(). If we + // did it the other way around the set_level() call on temp_log would set logc_temp.level() and + // logc.level() back to LINFO since temp_log is a parent of logc_temp. + logger logc_temp("example.test_class"); + logc_temp.set_level(LWARN); + + + // Finally, note that you can also configure your loggers from a text config file. + // See the documentation for the configure_loggers_from_file() function for details. +} + +int main() +{ + setup_loggers(); + + // print our first message. It will go to cout because that is the default. + logp << LINFO << "This is an informational message."; + + int variable = 8; + + // Here is a debug message. It won't print though because its log level is too low (it is below LINFO). + logp << LDEBUG << "The integer variable is set to " << variable; + + + if (variable > 4) + logp << LWARN << "The variable is bigger than 4! Its value is " << variable; + + logp << LINFO << "make two threads"; + create_new_thread(thread,0); + create_new_thread(thread,0); + + test mytest; + mytest.warning(); + + logp << LINFO << "we are going to sleep for half a second."; + // sleep for half a second + dlib::sleep(500); + logp << LINFO << "we just woke up"; + + + + logp << LINFO << "program ending"; + + + // It is also worth pointing out that the logger messages are atomic. This means, for example, that + // in the above log statements that involve a string literal and a variable, no other thread can + // come in and print a log message in-between the literal string and the variable. This is good + // because it means your messages don't get corrupted. However, this also means that you shouldn't + // make any function calls inside a logging statement if those calls might try to log a message + // themselves since the atomic nature of the logger would cause your application to deadlock. +} + + + diff --git a/ml/dlib/examples/matrix_ex.cpp b/ml/dlib/examples/matrix_ex.cpp new file mode 100644 index 00000000..a56dbfbb --- /dev/null +++ b/ml/dlib/examples/matrix_ex.cpp @@ -0,0 +1,276 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt + +/* + This is an example illustrating the use of the matrix object + from the dlib C++ Library. +*/ + + +#include <iostream> +#include <dlib/matrix.h> + +using namespace dlib; +using namespace std; + +// ---------------------------------------------------------------------------------------- + +int main() +{ + // Let's begin this example by using the library to solve a simple + // linear system. + // + // We will find the value of x such that y = M*x where + // + // 3.5 + // y = 1.2 + // 7.8 + // + // and M is + // + // 54.2 7.4 12.1 + // M = 1 2 3 + // 5.9 0.05 1 + + + // First let's declare these 3 matrices. + // This declares a matrix that contains doubles and has 3 rows and 1 column. + // Moreover, it's size is a compile time constant since we put it inside the <>. + matrix<double,3,1> y; + // Make a 3 by 3 matrix of doubles for the M matrix. In this case, M is + // sized at runtime and can therefore be resized later by calling M.set_size(). + matrix<double> M(3,3); + + // You may be wondering why someone would want to specify the size of a + // matrix at compile time when you don't have to. The reason is two fold. + // First, there is often a substantial performance improvement, especially + // for small matrices, because it enables a number of optimizations that + // otherwise would be impossible. Second, the dlib::matrix object checks + // these compile time sizes to ensure that the matrices are being used + // correctly. For example, if you attempt to compile the expression y*y you + // will get a compiler error since that is not a legal matrix operation (the + // matrix dimensions don't make sense as a matrix multiplication). So if + // you know the size of a matrix at compile time then it is always a good + // idea to let the compiler know about it. + + + + + // Now we need to initialize the y and M matrices and we can do so like this: + M = 54.2, 7.4, 12.1, + 1, 2, 3, + 5.9, 0.05, 1; + + y = 3.5, + 1.2, + 7.8; + + + // The solution to y = M*x can be obtained by multiplying the inverse of M + // with y. As an aside, you should *NEVER* use the auto keyword to capture + // the output from a matrix expression. So don't do this: auto x = inv(M)*y; + // To understand why, read the matrix_expressions_ex.cpp example program. + matrix<double> x = inv(M)*y; + + cout << "x: \n" << x << endl; + + // We can check that it really worked by plugging x back into the original equation + // and subtracting y to see if we get a column vector with values all very close + // to zero (Which is what happens. Also, the values may not be exactly zero because + // there may be some numerical error and round off). + cout << "M*x - y: \n" << M*x - y << endl; + + + // Also note that we can create run-time sized column or row vectors like so + matrix<double,0,1> runtime_sized_column_vector; + matrix<double,1,0> runtime_sized_row_vector; + // and then they are sized by saying + runtime_sized_column_vector.set_size(3); + + // Similarly, the x matrix can be resized by calling set_size(num rows, num columns). For example + x.set_size(3,4); // x now has 3 rows and 4 columns. + + + + // The elements of a matrix are accessed using the () operator like so: + cout << M(0,1) << endl; + // The above expression prints out the value 7.4. That is, the value of + // the element at row 0 and column 1. + + // If we have a matrix that is a row or column vector. That is, it contains either + // a single row or a single column then we know that any access is always either + // to row 0 or column 0 so we can omit that 0 and use the following syntax. + cout << y(1) << endl; + // The above expression prints out the value 1.2 + + + // Let's compute the sum of elements in the M matrix. + double M_sum = 0; + // loop over all the rows + for (long r = 0; r < M.nr(); ++r) + { + // loop over all the columns + for (long c = 0; c < M.nc(); ++c) + { + M_sum += M(r,c); + } + } + cout << "sum of all elements in M is " << M_sum << endl; + + // The above code is just to show you how to loop over the elements of a matrix. An + // easier way to find this sum is to do the following: + cout << "sum of all elements in M is " << sum(M) << endl; + + + + + // Note that you can always print a matrix to an output stream by saying: + cout << M << endl; + // which will print: + // 54.2 7.4 12.1 + // 1 2 3 + // 5.9 0.05 1 + + // However, if you want to print using comma separators instead of spaces you can say: + cout << csv << M << endl; + // and you will instead get this as output: + // 54.2, 7.4, 12.1 + // 1, 2, 3 + // 5.9, 0.05, 1 + + // Conversely, you can also read in a matrix that uses either space, tab, or comma + // separated values by uncommenting the following: + // cin >> M; + + + + // ----------------------------- Comparison with MATLAB ------------------------------ + // Here I list a set of Matlab commands and their equivalent expressions using the dlib + // matrix. Note that there are a lot more functions defined for the dlib::matrix. See + // the HTML documentation for a full listing. + + matrix<double> A, B, C, D, E; + matrix<int> Aint; + matrix<long> Blong; + + // MATLAB: A = eye(3) + A = identity_matrix<double>(3); + + // MATLAB: B = ones(3,4) + B = ones_matrix<double>(3,4); + + // MATLAB: B = rand(3,4) + B = randm(3,4); + + // MATLAB: C = 1.4*A + C = 1.4*A; + + // MATLAB: D = A.*C + D = pointwise_multiply(A,C); + + // MATLAB: E = A * B + E = A*B; + + // MATLAB: E = A + B + E = A + C; + + // MATLAB: E = A + 5 + E = A + 5; + + // MATLAB: E = E' + E = trans(E); // Note that if you want a conjugate transpose then you need to say conj(trans(E)) + + // MATLAB: E = B' * B + E = trans(B)*B; + + double var; + // MATLAB: var = A(1,2) + var = A(0,1); // dlib::matrix is 0 indexed rather than starting at 1 like Matlab. + + // MATLAB: C = round(C) + C = round(C); + + // MATLAB: C = floor(C) + C = floor(C); + + // MATLAB: C = ceil(C) + C = ceil(C); + + // MATLAB: C = diag(B) + C = diag(B); + + // MATLAB: B = cast(A, "int32") + Aint = matrix_cast<int>(A); + + // MATLAB: A = B(1,:) + A = rowm(B,0); + + // MATLAB: A = B([1:2],:) + A = rowm(B,range(0,1)); + + // MATLAB: A = B(:,1) + A = colm(B,0); + + // MATLAB: A = [1:5] + Blong = range(1,5); + + // MATLAB: A = [1:2:5] + Blong = range(1,2,5); + + // MATLAB: A = B([1:3], [1:2]) + A = subm(B, range(0,2), range(0,1)); + // or equivalently + A = subm(B, rectangle(0,0,1,2)); + + + // MATLAB: A = B([1:3], [1:2:4]) + A = subm(B, range(0,2), range(0,2,3)); + + // MATLAB: B(:,:) = 5 + B = 5; + // or equivalently + set_all_elements(B,5); + + + // MATLAB: B([1:2],[1,2]) = 7 + set_subm(B,range(0,1), range(0,1)) = 7; + + // MATLAB: B([1:3],[2:3]) = A + set_subm(B,range(0,2), range(1,2)) = A; + + // MATLAB: B(:,1) = 4 + set_colm(B,0) = 4; + + // MATLAB: B(:,[1:2]) = 4 + set_colm(B,range(0,1)) = 4; + + // MATLAB: B(:,1) = B(:,2) + set_colm(B,0) = colm(B,1); + + // MATLAB: B(1,:) = 4 + set_rowm(B,0) = 4; + + // MATLAB: B(1,:) = B(2,:) + set_rowm(B,0) = rowm(B,1); + + // MATLAB: var = det(E' * E) + var = det(trans(E)*E); + + // MATLAB: C = pinv(E) + C = pinv(E); + + // MATLAB: C = inv(E) + C = inv(E); + + // MATLAB: [A,B,C] = svd(E) + svd(E,A,B,C); + + // MATLAB: A = chol(E,'lower') + A = chol(E); + + // MATLAB: var = min(min(A)) + var = min(A); +} + +// ---------------------------------------------------------------------------------------- + + diff --git a/ml/dlib/examples/matrix_expressions_ex.cpp b/ml/dlib/examples/matrix_expressions_ex.cpp new file mode 100644 index 00000000..b5237090 --- /dev/null +++ b/ml/dlib/examples/matrix_expressions_ex.cpp @@ -0,0 +1,406 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt + +/* + This example contains a detailed discussion of the template expression + technique used to implement the matrix tools in the dlib C++ library. + + It also gives examples showing how a user can create their own custom + matrix expressions. + + Note that you should be familiar with the dlib::matrix before reading + this example. So if you haven't done so already you should read the + matrix_ex.cpp example program. +*/ + + +#include <iostream> +#include <dlib/matrix.h> + +using namespace dlib; +using namespace std; + +// ---------------------------------------------------------------------------------------- + +void custom_matrix_expressions_example(); + +// ---------------------------------------------------------------------------------------- + +int main() +{ + + // Declare some variables used below + matrix<double,3,1> y; + matrix<double,3,3> M; + matrix<double> x; + + // set all elements to 1 + y = 1; + M = 1; + + + // ------------------------- Template Expressions ----------------------------- + // Now I will discuss the "template expressions" technique and how it is + // used in the matrix object. First consider the following expression: + x = y + y; + + /* + Normally this expression results in machine code that looks, at a high + level, like the following: + temp = y + y; + x = temp + + Temp is a temporary matrix returned by the overloaded + operator. + temp then contains the result of adding y to itself. The assignment + operator copies the value of temp into x and temp is then destroyed while + the blissful C++ user never sees any of this. + + This is, however, totally inefficient. In the process described above + you have to pay for the cost of constructing a temporary matrix object + and allocating its memory. Then you pay the additional cost of copying + it over to x. It also gets worse when you have more complex expressions + such as x = round(y + y + y + M*y) which would involve the creation and copying + of 5 temporary matrices. + + All these inefficiencies are removed by using the template expressions + technique. The basic idea is as follows, instead of having operators and + functions return temporary matrix objects you return a special object that + represents the expression you wish to perform. + + So consider the expression x = y + y again. With dlib::matrix what happens + is the expression y+y returns a matrix_exp object instead of a temporary matrix. + The construction of a matrix_exp does not allocate any memory or perform any + computations. The matrix_exp however has an interface that looks just like a + dlib::matrix object and when you ask it for the value of one of its elements + it computes that value on the spot. Only in the assignment operator does + someone ask the matrix_exp for these values so this avoids the use of any + temporary matrices. Thus the statement x = y + y is equivalent to the following + code: + // loop over all elements in y matrix + for (long r = 0; r < y.nr(); ++r) + for (long c = 0; c < y.nc(); ++c) + x(r,c) = y(r,c) + y(r,c); + + + This technique works for expressions of arbitrary complexity. So if you typed + x = round(y + y + y + M*y) it would involve no temporary matrices being created + at all. Each operator takes and returns only matrix_exp objects. Thus, no + computations are performed until the assignment operator requests the values + from the matrix_exp it receives as input. This also means that statements such as: + auto x = round(y + y + y + M*y) + will not work properly because x would be a matrix expression that references + parts of the expression round(y + y + y + M*y) but those expression parts will + immediately go out of scope so x will contain references to non-existing sub + matrix expressions. This is very bad, so you should never use auto to store + the result of a matrix expression. Always store the output in a matrix object + like so: + matrix<double> x = round(y + y + y + M*y) + + + + + In terms of implementation, there is a slight complication in all of this. It + is for statements that involve the multiplication of a complex matrix_exp such + as the following: + */ + x = M*(M+M+M+M+M+M+M); + /* + According to the discussion above, this statement would compute the value of + M*(M+M+M+M+M+M+M) totally without any temporary matrix objects. This sounds + good but we should take a closer look. Consider that the + operator is + invoked 6 times. This means we have something like this: + + x = M * (matrix_exp representing M+M+M+M+M+M+M); + + M is being multiplied with a quite complex matrix_exp. Now recall that when + you ask a matrix_exp what the value of any of its elements are it computes + their values *right then*. + + If you think on what is involved in performing a matrix multiply you will + realize that each element of a matrix is accessed M.nr() times. In the + case of our above expression the cost of accessing an element of the + matrix_exp on the right hand side is the cost of doing 6 addition operations. + + Thus, it would be faster to assign M+M+M+M+M+M+M to a temporary matrix and then + multiply that by M. This is exactly what the dlib::matrix does under the covers. + This is because it is able to spot expressions where the introduction of a + temporary is needed to speed up the computation and it will automatically do this + for you. + + + + + Another complication that is dealt with automatically is aliasing. All matrix + expressions are said to "alias" their contents. For example, consider + the following expressions: + M + M + M * M + + We say that the expressions (M + M) and (M * M) alias M. Additionally, we say that + the expression (M * M) destructively aliases M. + + To understand why we say (M * M) destructively aliases M consider what would happen + if we attempted to evaluate it without first assigning (M * M) to a temporary matrix. + That is, if we attempted to perform the following: + for (long r = 0; r < M.nr(); ++r) + for (long c = 0; c < M.nc(); ++c) + M(r,c) = rowm(M,r)*colm(M,c); + + It is clear that the result would be corrupted and M wouldn't end up with the right + values in it. So in this case we must perform the following: + temp = M*M; + M = temp; + + This sort of interaction is what defines destructive aliasing. Whenever we are + assigning a matrix expression to a destination that is destructively aliased by + the expression we need to introduce a temporary. The dlib::matrix is capable of + recognizing the two forms of aliasing and introduces temporary matrices only when + necessary. + */ + + + + // Next we discuss how to create custom matrix expressions. In what follows we + // will define three different matrix expressions and show their use. + custom_matrix_expressions_example(); +} + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + +template <typename M> +struct example_op_trans +{ + /*! + This object defines a matrix expression that represents a transposed matrix. + As discussed above, constructing this object doesn't compute anything. It just + holds a reference to a matrix and presents an interface which defines + matrix transposition. + !*/ + + // Here we simply hold a reference to the matrix we are supposed to be the transpose of. + example_op_trans( const M& m_) : m(m_){} + const M& m; + + // The cost field is used by matrix multiplication code to decide if a temporary needs to + // be introduced. It represents the computational cost of evaluating an element of the + // matrix expression. In this case we say that the cost of obtaining an element of the + // transposed matrix is the same as obtaining an element of the original matrix (since + // transpose doesn't really compute anything). + const static long cost = M::cost; + + // Here we define the matrix expression's compile-time known dimensions. Since this + // is a transpose we define them to be the reverse of M's dimensions. + const static long NR = M::NC; + const static long NC = M::NR; + + // Define the type of element in this matrix expression. Also define the + // memory manager type used and the layout. Usually we use the same types as the + // input matrix. + typedef typename M::type type; + typedef typename M::mem_manager_type mem_manager_type; + typedef typename M::layout_type layout_type; + + // This is where the action is. This function is what defines the value of an element of + // this matrix expression. Here we are saying that m(c,r) == trans(m)(r,c) which is just + // the definition of transposition. Note also that we must define the return type from this + // function as a typedef. This typedef lets us either return our argument by value or by + // reference. In this case we use the same type as the underlying m matrix. Later in this + // example program you will see two other options. + typedef typename M::const_ret_type const_ret_type; + const_ret_type apply (long r, long c) const { return m(c,r); } + + // Define the run-time defined dimensions of this matrix. + long nr () const { return m.nc(); } + long nc () const { return m.nr(); } + + // Recall the discussion of aliasing. Each matrix expression needs to define what + // kind of aliasing it introduces so that we know when to introduce temporaries. The + // aliases() function indicates that the matrix transpose expression aliases item if + // and only if the m matrix aliases item. + template <typename U> bool aliases ( const matrix_exp<U>& item) const { return m.aliases(item); } + // This next function indicates that the matrix transpose expression also destructively + // aliases anything m aliases. I.e. transpose has destructive aliasing. + template <typename U> bool destructively_aliases ( const matrix_exp<U>& item) const { return m.aliases(item); } + +}; + + +// Here we define a simple function that creates and returns transpose expressions. Note that the +// matrix_op<> template is a matrix_exp object and exists solely to reduce the amount of boilerplate +// you have to write to create a matrix expression. +template < typename M > +const matrix_op<example_op_trans<M> > example_trans ( + const matrix_exp<M>& m +) +{ + typedef example_op_trans<M> op; + // m.ref() returns a reference to the object of type M contained in the matrix expression m. + return matrix_op<op>(op(m.ref())); +} + +// ---------------------------------------------------------------------------------------- + +template <typename T> +struct example_op_vector_to_matrix +{ + /*! + This object defines a matrix expression that holds a reference to a std::vector<T> + and makes it look like a column vector. Thus it enables you to use a std::vector + as if it was a dlib::matrix. + + !*/ + example_op_vector_to_matrix( const std::vector<T>& vect_) : vect(vect_){} + + const std::vector<T>& vect; + + // This expression wraps direct memory accesses so we use the lowest possible cost. + const static long cost = 1; + + const static long NR = 0; // We don't know the length of the vector until runtime. So we put 0 here. + const static long NC = 1; // We do know that it only has one column (since it's a vector) + typedef T type; + // Since the std::vector doesn't use a dlib memory manager we list the default one here. + typedef default_memory_manager mem_manager_type; + // The layout type also doesn't really matter in this case. So we list row_major_layout + // since it is a good default. + typedef row_major_layout layout_type; + + // Note that we define const_ret_type to be a reference type. This way we can + // return the contents of the std::vector by reference. + typedef const T& const_ret_type; + const_ret_type apply (long r, long ) const { return vect[r]; } + + long nr () const { return vect.size(); } + long nc () const { return 1; } + + // This expression never aliases anything since it doesn't contain any matrix expression (it + // contains only a std::vector which doesn't count since you can't assign a matrix expression + // to a std::vector object). + template <typename U> bool aliases ( const matrix_exp<U>& ) const { return false; } + template <typename U> bool destructively_aliases ( const matrix_exp<U>& ) const { return false; } +}; + +template < typename T > +const matrix_op<example_op_vector_to_matrix<T> > example_vector_to_matrix ( + const std::vector<T>& vector +) +{ + typedef example_op_vector_to_matrix<T> op; + return matrix_op<op>(op(vector)); +} + +// ---------------------------------------------------------------------------------------- + +template <typename M, typename T> +struct example_op_add_scalar +{ + /*! + This object defines a matrix expression that represents a matrix with a single + scalar value added to all its elements. + !*/ + + example_op_add_scalar( const M& m_, const T& val_) : m(m_), val(val_){} + + // A reference to the matrix + const M& m; + // A copy of the scalar value that should be added to each element of m + const T val; + + // This time we add 1 to the cost since evaluating an element of this + // expression means performing 1 addition operation. + const static long cost = M::cost + 1; + const static long NR = M::NR; + const static long NC = M::NC; + typedef typename M::type type; + typedef typename M::mem_manager_type mem_manager_type; + typedef typename M::layout_type layout_type; + + // Note that we declare const_ret_type to be a non-reference type. This is important + // since apply() computes a new temporary value and thus we can't return a reference + // to it. + typedef type const_ret_type; + const_ret_type apply (long r, long c) const { return m(r,c) + val; } + + long nr () const { return m.nr(); } + long nc () const { return m.nc(); } + + // This expression aliases anything m aliases. + template <typename U> bool aliases ( const matrix_exp<U>& item) const { return m.aliases(item); } + // Unlike the transpose expression. This expression only destructively aliases something if m does. + // So this expression has the regular non-destructive kind of aliasing. + template <typename U> bool destructively_aliases ( const matrix_exp<U>& item) const { return m.destructively_aliases(item); } + +}; + +template < typename M, typename T > +const matrix_op<example_op_add_scalar<M,T> > add_scalar ( + const matrix_exp<M>& m, + T val +) +{ + typedef example_op_add_scalar<M,T> op; + return matrix_op<op>(op(m.ref(), val)); +} + +// ---------------------------------------------------------------------------------------- + +void custom_matrix_expressions_example( +) +{ + matrix<double> x(2,3); + x = 1, 1, 1, + 2, 2, 2; + + cout << x << endl; + + // Finally, let's use the matrix expressions we defined above. + + // prints the transpose of x + cout << example_trans(x) << endl; + + // prints this: + // 11 11 11 + // 12 12 12 + cout << add_scalar(x, 10) << endl; + + + // matrix expressions can be nested, even the user defined ones. + // the following statement prints this: + // 11 12 + // 11 12 + // 11 12 + cout << example_trans(add_scalar(x, 10)) << endl; + + // Since we setup the alias detection correctly we can even do this: + x = example_trans(add_scalar(x, 10)); + cout << "new x:\n" << x << endl; + + cout << "Do some testing with the example_vector_to_matrix() function: " << endl; + std::vector<float> vect; + vect.push_back(1); + vect.push_back(3); + vect.push_back(5); + + // Now let's treat our std::vector like a matrix and print some things. + cout << example_vector_to_matrix(vect) << endl; + cout << add_scalar(example_vector_to_matrix(vect), 10) << endl; + + + + /* + As an aside, note that dlib contains functions equivalent to the ones we + defined above. They are: + - dlib::trans() + - dlib::mat() (converts things into matrices) + - operator+ (e.g. you can say my_mat + 1) + + + Also, if you are going to be creating your own matrix expression you should also + look through the matrix code in the dlib/matrix folder. There you will find + many other examples of matrix expressions. + */ +} + +// ---------------------------------------------------------------------------------------- + diff --git a/ml/dlib/examples/max_cost_assignment_ex.cpp b/ml/dlib/examples/max_cost_assignment_ex.cpp new file mode 100755 index 00000000..f6985a9e --- /dev/null +++ b/ml/dlib/examples/max_cost_assignment_ex.cpp @@ -0,0 +1,47 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This simple example shows how to call dlib's optimal linear assignment problem solver. + It is an implementation of the famous Hungarian algorithm and is quite fast, operating in + O(N^3) time. + +*/ + +#include <dlib/optimization/max_cost_assignment.h> +#include <iostream> + +using namespace std; +using namespace dlib; + +int main () +{ + // Let's imagine you need to assign N people to N jobs. Additionally, each person will make + // your company a certain amount of money at each job, but each person has different skills + // so they are better at some jobs and worse at others. You would like to find the best way + // to assign people to these jobs. In particular, you would like to maximize the amount of + // money the group makes as a whole. This is an example of an assignment problem and is + // what is solved by the max_cost_assignment() routine. + // + // So in this example, let's imagine we have 3 people and 3 jobs. We represent the amount of + // money each person will produce at each job with a cost matrix. Each row corresponds to a + // person and each column corresponds to a job. So for example, below we are saying that + // person 0 will make $1 at job 0, $2 at job 1, and $6 at job 2. + matrix<int> cost(3,3); + cost = 1, 2, 6, + 5, 3, 6, + 4, 5, 0; + + // To find out the best assignment of people to jobs we just need to call this function. + std::vector<long> assignment = max_cost_assignment(cost); + + // This prints optimal assignments: [2, 0, 1] which indicates that we should assign + // the person from the first row of the cost matrix to job 2, the middle row person to + // job 0, and the bottom row person to job 1. + for (unsigned int i = 0; i < assignment.size(); i++) + cout << assignment[i] << std::endl; + + // This prints optimal cost: 16.0 + // which is correct since our optimal assignment is 6+5+5. + cout << "optimal cost: " << assignment_cost(cost, assignment) << endl; +} + diff --git a/ml/dlib/examples/member_function_pointer_ex.cpp b/ml/dlib/examples/member_function_pointer_ex.cpp new file mode 100644 index 00000000..26724d3a --- /dev/null +++ b/ml/dlib/examples/member_function_pointer_ex.cpp @@ -0,0 +1,78 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt + +/* + This is an example illustrating the use of the member_function_pointer object + from the dlib C++ Library. + +*/ + + +#include <iostream> +#include <dlib/member_function_pointer.h> + +using namespace dlib; +using namespace std; + +// ---------------------------------------------------------------------------------------- + +class example_object +{ +public: + + void do_something ( + ) + { + cout << "hello world" << endl; + } + + void print_this_number ( + int num + ) + { + cout << "number you gave me = " << num << endl; + } + +}; + +// ---------------------------------------------------------------------------------------- + +int main() +{ + // create a pointer that can point to member functions that take no arguments + member_function_pointer<> mfp1; + + // create a pointer that can point to member functions that take a single int argument + member_function_pointer<int> mfp2; + + example_object obj; + + // now we set the mfp1 pointer to point to the member function do_something() + // on the obj object. + mfp1.set(obj, &example_object::do_something); + + + // now we set the mfp1 pointer to point to the member function print_this_number() + // on the obj object. + mfp2.set(obj, &example_object::print_this_number); + + + // Now we can call the function this pointer points to. This calls the function + // obj.do_something() via our member function pointer. + mfp1(); + + // Now we can call the function this pointer points to. This calls the function + // obj.print_this_number(5) via our member function pointer. + mfp2(5); + + + // The above example shows a very simple use of the member_function_pointer. + // A more interesting use of the member_function_pointer is in the implementation + // of callbacks or event handlers. For example, when you register an event + // handler for a dlib::button click it uses a member_function_pointer + // internally to save and later call your event handler. +} + +// ---------------------------------------------------------------------------------------- + + + diff --git a/ml/dlib/examples/mlp_ex.cpp b/ml/dlib/examples/mlp_ex.cpp new file mode 100644 index 00000000..372753c8 --- /dev/null +++ b/ml/dlib/examples/mlp_ex.cpp @@ -0,0 +1,86 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the multilayer perceptron + from the dlib C++ Library. + + This example creates a simple set of data to train on and shows + you how to train a mlp object on that data. + + + The data used in this example will be 2 dimensional data and will + come from a distribution where points with a distance less than 10 + from the origin are labeled 1 and all other points are labeled + as 0. + +*/ + + +#include <iostream> +#include <dlib/mlp.h> + +using namespace std; +using namespace dlib; + + +int main() +{ + // The mlp takes column vectors as input and gives column vectors as output. The dlib::matrix + // object is used to represent the column vectors. So the first thing we do here is declare + // a convenient typedef for the matrix object we will be using. + + // This typedef declares a matrix with 2 rows and 1 column. It will be the + // object that contains each of our 2 dimensional samples. (Note that if you wanted + // more than 2 features in this vector you can simply change the 2 to something else) + typedef matrix<double, 2, 1> sample_type; + + + // make an instance of a sample matrix so we can use it below + sample_type sample; + + // Create a multi-layer perceptron network. This network has 2 nodes on the input layer + // (which means it takes column vectors of length 2 as input) and 5 nodes in the first + // hidden layer. Note that the other 4 variables in the mlp's constructor are left at + // their default values. + mlp::kernel_1a_c net(2,5); + + // Now let's put some data into our sample and train on it. We do this + // by looping over 41*41 points and labeling them according to their + // distance from the origin. + for (int i = 0; i < 1000; ++i) + { + for (int r = -20; r <= 20; ++r) + { + for (int c = -20; c <= 20; ++c) + { + sample(0) = r; + sample(1) = c; + + // if this point is less than 10 from the origin + if (sqrt((double)r*r + c*c) <= 10) + net.train(sample,1); + else + net.train(sample,0); + } + } + } + + // Now we have trained our mlp. Let's see how well it did. + // Note that if you run this program multiple times you will get different results. This + // is because the mlp network is randomly initialized. + + // each of these statements prints out the output of the network given a particular sample. + + sample(0) = 3.123; + sample(1) = 4; + cout << "This sample should be close to 1 and it is classified as a " << net(sample) << endl; + + sample(0) = 13.123; + sample(1) = 9.3545; + cout << "This sample should be close to 0 and it is classified as a " << net(sample) << endl; + + sample(0) = 13.123; + sample(1) = 0; + cout << "This sample should be close to 0 and it is classified as a " << net(sample) << endl; +} + diff --git a/ml/dlib/examples/mmod_cars_test_image.jpg b/ml/dlib/examples/mmod_cars_test_image.jpg Binary files differnew file mode 100644 index 00000000..cfffffe6 --- /dev/null +++ b/ml/dlib/examples/mmod_cars_test_image.jpg diff --git a/ml/dlib/examples/mmod_cars_test_image2.jpg b/ml/dlib/examples/mmod_cars_test_image2.jpg Binary files differnew file mode 100644 index 00000000..16aa30eb --- /dev/null +++ b/ml/dlib/examples/mmod_cars_test_image2.jpg diff --git a/ml/dlib/examples/model_selection_ex.cpp b/ml/dlib/examples/model_selection_ex.cpp new file mode 100644 index 00000000..cfe2bf62 --- /dev/null +++ b/ml/dlib/examples/model_selection_ex.cpp @@ -0,0 +1,148 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example that shows how you can perform model selection with the + dlib C++ Library. + + It will create a simple dataset and show you how to use cross validation and + global optimization to determine good parameters for the purpose of training + an svm to classify the data. + + The data used in this example will be 2 dimensional data and will come from a + distribution where points with a distance less than 10 from the origin are + labeled +1 and all other points are labeled as -1. + + + As an side, you should probably read the svm_ex.cpp and matrix_ex.cpp example + programs before you read this one. +*/ + + +#include <iostream> +#include <dlib/svm.h> +#include <dlib/global_optimization.h> + +using namespace std; +using namespace dlib; + + +int main() try +{ + // The svm functions use column vectors to contain a lot of the data on which they + // operate. So the first thing we do here is declare a convenient typedef. + + // This typedef declares a matrix with 2 rows and 1 column. It will be the + // object that contains each of our 2 dimensional samples. + typedef matrix<double, 2, 1> sample_type; + + + + // Now we make objects to contain our samples and their respective labels. + std::vector<sample_type> samples; + std::vector<double> labels; + + // Now let's put some data into our samples and labels objects. We do this + // by looping over a bunch of points and labeling them according to their + // distance from the origin. + for (double r = -20; r <= 20; r += 0.8) + { + for (double c = -20; c <= 20; c += 0.8) + { + sample_type samp; + samp(0) = r; + samp(1) = c; + samples.push_back(samp); + + // if this point is less than 10 from the origin + if (sqrt(r*r + c*c) <= 10) + labels.push_back(+1); + else + labels.push_back(-1); + } + } + + cout << "Generated " << samples.size() << " points" << endl; + + + // Here we normalize all the samples by subtracting their mean and dividing by their + // standard deviation. This is generally a good idea since it often heads off + // numerical stability problems and also prevents one large feature from smothering + // others. Doing this doesn't matter much in this example so I'm just doing this here + // so you can see an easy way to accomplish this with the library. + vector_normalizer<sample_type> normalizer; + // let the normalizer learn the mean and standard deviation of the samples + normalizer.train(samples); + // now normalize each sample + for (unsigned long i = 0; i < samples.size(); ++i) + samples[i] = normalizer(samples[i]); + + + // Now that we have some data we want to train on it. We are going to train a + // binary SVM with the RBF kernel to classify the data. However, there are + // three parameters to the training. These are the SVM C parameters for each + // class and the RBF kernel's gamma parameter. Our choice for these + // parameters will influence how good the resulting decision function is. To + // test how good a particular choice of these parameters is we can use the + // cross_validate_trainer() function to perform n-fold cross validation on our + // training data. However, there is a problem with the way we have sampled + // our distribution above. The problem is that there is a definite ordering + // to the samples. That is, the first half of the samples look like they are + // from a different distribution than the second half. This would screw up + // the cross validation process, but we can fix it by randomizing the order of + // the samples with the following function call. + randomize_samples(samples, labels); + + + // And now we get to the important bit. Here we define a function, + // cross_validation_score(), that will do the cross-validation we + // mentioned and return a number indicating how good a particular setting + // of gamma, c1, and c2 is. + auto cross_validation_score = [&](const double gamma, const double c1, const double c2) + { + // Make a RBF SVM trainer and tell it what the parameters are supposed to be. + typedef radial_basis_kernel<sample_type> kernel_type; + svm_c_trainer<kernel_type> trainer; + trainer.set_kernel(kernel_type(gamma)); + trainer.set_c_class1(c1); + trainer.set_c_class2(c2); + + // Finally, perform 10-fold cross validation and then print and return the results. + matrix<double> result = cross_validate_trainer(trainer, samples, labels, 10); + cout << "gamma: " << setw(11) << gamma << " c1: " << setw(11) << c1 << " c2: " << setw(11) << c2 << " cross validation accuracy: " << result; + + // Now return a number indicating how good the parameters are. Bigger is + // better in this example. Here I'm returning the harmonic mean between the + // accuracies of each class. However, you could do something else. For + // example, you might care a lot more about correctly predicting the +1 class, + // so you could penalize results that didn't obtain a high accuracy on that + // class. You might do this by using something like a weighted version of the + // F1-score (see http://en.wikipedia.org/wiki/F1_score). + return 2*prod(result)/sum(result); + }; + + + // And finally, we call this global optimizer that will search for the best parameters. + // It will call cross_validation_score() 50 times with different settings and return + // the best parameter setting it finds. find_max_global() uses a global optimization + // method based on a combination of non-parametric global function modeling and + // quadratic trust region modeling to efficiently find a global maximizer. It usually + // does a good job with a relatively small number of calls to cross_validation_score(). + // In this example, you should observe that it finds settings that give perfect binary + // classification of the data. + auto result = find_max_global(cross_validation_score, + {1e-5, 1e-5, 1e-5}, // lower bound constraints on gamma, c1, and c2, respectively + {100, 1e6, 1e6}, // upper bound constraints on gamma, c1, and c2, respectively + max_function_calls(50)); + + double best_gamma = result.x(0); + double best_c1 = result.x(1); + double best_c2 = result.x(2); + + cout << " best cross-validation score: " << result.y << endl; + cout << " best gamma: " << best_gamma << " best c1: " << best_c1 << " best c2: "<< best_c2 << endl; +} +catch (exception& e) +{ + cout << e.what() << endl; +} + diff --git a/ml/dlib/examples/mpc_ex.cpp b/ml/dlib/examples/mpc_ex.cpp new file mode 100644 index 00000000..8df5173d --- /dev/null +++ b/ml/dlib/examples/mpc_ex.cpp @@ -0,0 +1,156 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the linear model predictive + control tool from the dlib C++ Library. To explain what it does, suppose + you have some process you want to control and the process dynamics are + described by the linear equation: + x_{i+1} = A*x_i + B*u_i + C + That is, the next state the system goes into is a linear function of its + current state (x_i) and the current control (u_i) plus some constant bias or + disturbance. + + A model predictive controller can find the control (u) you should apply to + drive the state (x) to some reference value, which is what we show in this + example. In particular, we will simulate a simple vehicle moving around in + a planet's gravity. We will use MPC to get the vehicle to fly to and then + hover at a certain point in the air. + +*/ + + +#include <dlib/gui_widgets.h> +#include <dlib/control.h> +#include <dlib/image_transforms.h> + + +using namespace std; +using namespace dlib; + +// ---------------------------------------------------------------------------- + +int main() +{ + const int STATES = 4; + const int CONTROLS = 2; + + // The first thing we do is setup our vehicle dynamics model (A*x + B*u + C). + // Our state space (the x) will have 4 dimensions, the 2D vehicle position + // and also the 2D velocity. The control space (u) will be just 2 variables + // which encode the amount of force we apply to the vehicle along each axis. + // Therefore, the A matrix defines a simple constant velocity model. + matrix<double,STATES,STATES> A; + A = 1, 0, 1, 0, // next_pos = pos + velocity + 0, 1, 0, 1, // next_pos = pos + velocity + 0, 0, 1, 0, // next_velocity = velocity + 0, 0, 0, 1; // next_velocity = velocity + + // Here we say that the control variables effect only the velocity. That is, + // the control applies an acceleration to the vehicle. + matrix<double,STATES,CONTROLS> B; + B = 0, 0, + 0, 0, + 1, 0, + 0, 1; + + // Let's also say there is a small constant acceleration in one direction. + // This is the force of gravity in our model. + matrix<double,STATES,1> C; + C = 0, + 0, + 0, + 0.1; + + + const int HORIZON = 30; + // Now we need to setup some MPC specific parameters. To understand them, + // let's first talk about how MPC works. When the MPC tool finds the "best" + // control to apply it does it by simulating the process for HORIZON time + // steps and selecting the control that leads to the best performance over + // the next HORIZON steps. + // + // To be precise, each time you ask it for a control, it solves the + // following quadratic program: + // + // min sum_i trans(x_i-target_i)*Q*(x_i-target_i) + trans(u_i)*R*u_i + // x_i,u_i + // + // such that: x_0 == current_state + // x_{i+1} == A*x_i + B*u_i + C + // lower <= u_i <= upper + // 0 <= i < HORIZON + // + // and reports u_0 as the control you should take given that you are currently + // in current_state. Q and R are user supplied matrices that define how we + // penalize variations away from the target state as well as how much we want + // to avoid generating large control signals. We also allow you to specify + // upper and lower bound constraints on the controls. The next few lines + // define these parameters for our simple example. + + matrix<double,STATES,1> Q; + // Setup Q so that the MPC only cares about matching the target position and + // ignores the velocity. + Q = 1, 1, 0, 0; + + matrix<double,CONTROLS,1> R, lower, upper; + R = 1, 1; + lower = -0.5, -0.5; + upper = 0.5, 0.5; + + // Finally, create the MPC controller. + mpc<STATES,CONTROLS,HORIZON> controller(A,B,C,Q,R,lower,upper); + + + // Let's tell the controller to send our vehicle to a random location. It + // will try to find the controls that makes the vehicle just hover at this + // target position. + dlib::rand rnd; + matrix<double,STATES,1> target; + target = rnd.get_random_double()*400,rnd.get_random_double()*400,0,0; + controller.set_target(target); + + + // Now let's start simulating our vehicle. Our vehicle moves around inside + // a 400x400 unit sized world. + matrix<rgb_pixel> world(400,400); + image_window win; + matrix<double,STATES,1> current_state; + // And we start it at the center of the world with zero velocity. + current_state = 200,200,0,0; + + int iter = 0; + while(!win.is_closed()) + { + // Find the best control action given our current state. + matrix<double,CONTROLS,1> action = controller(current_state); + cout << "best control: " << trans(action); + + // Now draw our vehicle on the world. We will draw the vehicle as a + // black circle and its target position as a green circle. + assign_all_pixels(world, rgb_pixel(255,255,255)); + const dpoint pos = point(current_state(0),current_state(1)); + const dpoint goal = point(target(0),target(1)); + draw_solid_circle(world, goal, 9, rgb_pixel(100,255,100)); + draw_solid_circle(world, pos, 7, 0); + // We will also draw the control as a line showing which direction the + // vehicle's thruster is firing. + draw_line(world, pos, pos-50*action, rgb_pixel(255,0,0)); + win.set_image(world); + + // Take a step in the simulation + current_state = A*current_state + B*action + C; + dlib::sleep(100); + + // Every 100 iterations change the target to some other random location. + ++iter; + if (iter > 100) + { + iter = 0; + target = rnd.get_random_double()*400,rnd.get_random_double()*400,0,0; + controller.set_target(target); + } + } +} + +// ---------------------------------------------------------------------------- + diff --git a/ml/dlib/examples/multiclass_classification_ex.cpp b/ml/dlib/examples/multiclass_classification_ex.cpp new file mode 100644 index 00000000..782511ca --- /dev/null +++ b/ml/dlib/examples/multiclass_classification_ex.cpp @@ -0,0 +1,248 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This is an example illustrating the use of the multiclass classification tools + from the dlib C++ Library. Specifically, this example will make points from + three classes and show you how to train a multiclass classifier to recognize + these three classes. + + The classes are as follows: + - class 1: points very close to the origin + - class 2: points on the circle of radius 10 around the origin + - class 3: points that are on a circle of radius 4 but not around the origin at all +*/ + +#include <dlib/svm_threaded.h> + +#include <iostream> +#include <vector> + +#include <dlib/rand.h> + +using namespace std; +using namespace dlib; + +// Our data will be 2-dimensional data. So declare an appropriate type to contain these points. +typedef matrix<double,2,1> sample_type; + +// ---------------------------------------------------------------------------------------- + +void generate_data ( + std::vector<sample_type>& samples, + std::vector<double>& labels +); +/*! + ensures + - make some 3 class data as described above. + - Create 60 points from class 1 + - Create 70 points from class 2 + - Create 80 points from class 3 +!*/ + +// ---------------------------------------------------------------------------------------- + +int main() +{ + try + { + std::vector<sample_type> samples; + std::vector<double> labels; + + // First, get our labeled set of training data + generate_data(samples, labels); + + cout << "samples.size(): "<< samples.size() << endl; + + // The main object in this example program is the one_vs_one_trainer. It is essentially + // a container class for regular binary classifier trainer objects. In particular, it + // uses the any_trainer object to store any kind of trainer object that implements a + // .train(samples,labels) function which returns some kind of learned decision function. + // It uses these binary classifiers to construct a voting multiclass classifier. If + // there are N classes then it trains N*(N-1)/2 binary classifiers, one for each pair of + // labels, which then vote on the label of a sample. + // + // In this example program we will work with a one_vs_one_trainer object which stores any + // kind of trainer that uses our sample_type samples. + typedef one_vs_one_trainer<any_trainer<sample_type> > ovo_trainer; + + + // Finally, make the one_vs_one_trainer. + ovo_trainer trainer; + + + // Next, we will make two different binary classification trainer objects. One + // which uses kernel ridge regression and RBF kernels and another which uses a + // support vector machine and polynomial kernels. The particular details don't matter. + // The point of this part of the example is that you can use any kind of trainer object + // with the one_vs_one_trainer. + typedef polynomial_kernel<sample_type> poly_kernel; + typedef radial_basis_kernel<sample_type> rbf_kernel; + + // make the binary trainers and set some parameters + krr_trainer<rbf_kernel> rbf_trainer; + svm_nu_trainer<poly_kernel> poly_trainer; + poly_trainer.set_kernel(poly_kernel(0.1, 1, 2)); + rbf_trainer.set_kernel(rbf_kernel(0.1)); + + + // Now tell the one_vs_one_trainer that, by default, it should use the rbf_trainer + // to solve the individual binary classification subproblems. + trainer.set_trainer(rbf_trainer); + // We can also get more specific. Here we tell the one_vs_one_trainer to use the + // poly_trainer to solve the class 1 vs class 2 subproblem. All the others will + // still be solved with the rbf_trainer. + trainer.set_trainer(poly_trainer, 1, 2); + + // Now let's do 5-fold cross-validation using the one_vs_one_trainer we just setup. + // As an aside, always shuffle the order of the samples before doing cross validation. + // For a discussion of why this is a good idea see the svm_ex.cpp example. + randomize_samples(samples, labels); + cout << "cross validation: \n" << cross_validate_multiclass_trainer(trainer, samples, labels, 5) << endl; + // The output is shown below. It is the confusion matrix which describes the results. Each row + // corresponds to a class of data and each column to a prediction. Reading from top to bottom, + // the rows correspond to the class labels if the labels have been listed in sorted order. So the + // top row corresponds to class 1, the middle row to class 2, and the bottom row to class 3. The + // columns are organized similarly, with the left most column showing how many samples were predicted + // as members of class 1. + // + // So in the results below we can see that, for the class 1 samples, 60 of them were correctly predicted + // to be members of class 1 and 0 were incorrectly classified. Similarly, the other two classes of data + // are perfectly classified. + /* + cross validation: + 60 0 0 + 0 70 0 + 0 0 80 + */ + + // Next, if you wanted to obtain the decision rule learned by a one_vs_one_trainer you + // would store it into a one_vs_one_decision_function. + one_vs_one_decision_function<ovo_trainer> df = trainer.train(samples, labels); + + cout << "predicted label: "<< df(samples[0]) << ", true label: "<< labels[0] << endl; + cout << "predicted label: "<< df(samples[90]) << ", true label: "<< labels[90] << endl; + // The output is: + /* + predicted label: 2, true label: 2 + predicted label: 1, true label: 1 + */ + + + // If you want to save a one_vs_one_decision_function to disk, you can do + // so. However, you must declare what kind of decision functions it contains. + one_vs_one_decision_function<ovo_trainer, + decision_function<poly_kernel>, // This is the output of the poly_trainer + decision_function<rbf_kernel> // This is the output of the rbf_trainer + > df2, df3; + + + // Put df into df2 and then save df2 to disk. Note that we could have also said + // df2 = trainer.train(samples, labels); But doing it this way avoids retraining. + df2 = df; + serialize("df.dat") << df2; + + // load the function back in from disk and store it in df3. + deserialize("df.dat") >> df3; + + + // Test df3 to see that this worked. + cout << endl; + cout << "predicted label: "<< df3(samples[0]) << ", true label: "<< labels[0] << endl; + cout << "predicted label: "<< df3(samples[90]) << ", true label: "<< labels[90] << endl; + // Test df3 on the samples and labels and print the confusion matrix. + cout << "test deserialized function: \n" << test_multiclass_decision_function(df3, samples, labels) << endl; + + + + + + // Finally, if you want to get the binary classifiers from inside a multiclass decision + // function you can do it by calling get_binary_decision_functions() like so: + one_vs_one_decision_function<ovo_trainer>::binary_function_table functs; + functs = df.get_binary_decision_functions(); + cout << "number of binary decision functions in df: " << functs.size() << endl; + // The functs object is a std::map which maps pairs of labels to binary decision + // functions. So we can access the individual decision functions like so: + decision_function<poly_kernel> df_1_2 = any_cast<decision_function<poly_kernel> >(functs[make_unordered_pair(1,2)]); + decision_function<rbf_kernel> df_1_3 = any_cast<decision_function<rbf_kernel> >(functs[make_unordered_pair(1,3)]); + // df_1_2 contains the binary decision function that votes for class 1 vs. 2. + // Similarly, df_1_3 contains the classifier that votes for 1 vs. 3. + + // Note that the multiclass decision function doesn't know what kind of binary + // decision functions it contains. So we have to use any_cast to explicitly cast + // them back into the concrete type. If you make a mistake and try to any_cast a + // binary decision function into the wrong type of function any_cast will throw a + // bad_any_cast exception. + } + catch (std::exception& e) + { + cout << "exception thrown!" << endl; + cout << e.what() << endl; + } +} + +// ---------------------------------------------------------------------------------------- + +void generate_data ( + std::vector<sample_type>& samples, + std::vector<double>& labels +) +{ + const long num = 50; + + sample_type m; + + dlib::rand rnd; + + + // make some samples near the origin + double radius = 0.5; + for (long i = 0; i < num+10; ++i) + { + double sign = 1; + if (rnd.get_random_double() < 0.5) + sign = -1; + m(0) = 2*radius*rnd.get_random_double()-radius; + m(1) = sign*sqrt(radius*radius - m(0)*m(0)); + + // add this sample to our set of training samples + samples.push_back(m); + labels.push_back(1); + } + + // make some samples in a circle around the origin but far away + radius = 10.0; + for (long i = 0; i < num+20; ++i) + { + double sign = 1; + if (rnd.get_random_double() < 0.5) + sign = -1; + m(0) = 2*radius*rnd.get_random_double()-radius; + m(1) = sign*sqrt(radius*radius - m(0)*m(0)); + + // add this sample to our set of training samples + samples.push_back(m); + labels.push_back(2); + } + + // make some samples in a circle around the point (25,25) + radius = 4.0; + for (long i = 0; i < num+30; ++i) + { + double sign = 1; + if (rnd.get_random_double() < 0.5) + sign = -1; + m(0) = 2*radius*rnd.get_random_double()-radius; + m(1) = sign*sqrt(radius*radius - m(0)*m(0)); + + // translate this point away from the origin + m(0) += 25; + m(1) += 25; + + // add this sample to our set of training samples + samples.push_back(m); + labels.push_back(3); + } +} + +// ---------------------------------------------------------------------------------------- + diff --git a/ml/dlib/examples/multithreaded_object_ex.cpp b/ml/dlib/examples/multithreaded_object_ex.cpp new file mode 100644 index 00000000..fed32a91 --- /dev/null +++ b/ml/dlib/examples/multithreaded_object_ex.cpp @@ -0,0 +1,138 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the multithreaded_object. + + This is a very simple example. It creates 3 threads that + just print messages to the screen. + + + + Example program output: + 0 INFO [1] mto: thread1(): hurray threads! + 0 INFO [2] mto: thread2(): hurray threads! + 0 INFO [3] mto: thread2(): hurray threads! + 700 INFO [1] mto: thread1(): hurray threads! + 800 INFO [2] mto: thread2(): hurray threads! + 801 INFO [3] mto: thread2(): hurray threads! + 1400 INFO [1] mto: thread1(): hurray threads! + 1604 INFO [2] mto: thread2(): hurray threads! + 1605 INFO [3] mto: thread2(): hurray threads! + 2100 INFO [1] mto: thread1(): hurray threads! + 2409 INFO [2] mto: thread2(): hurray threads! + 2409 INFO [3] mto: thread2(): hurray threads! + 2801 INFO [1] mto: thread1(): hurray threads! + 3001 INFO [0] mto: paused threads + 6001 INFO [0] mto: starting threads back up from paused state + 6001 INFO [2] mto: thread2(): hurray threads! + 6001 INFO [1] mto: thread1(): hurray threads! + 6001 INFO [3] mto: thread2(): hurray threads! + 6705 INFO [1] mto: thread1(): hurray threads! + 6805 INFO [2] mto: thread2(): hurray threads! + 6805 INFO [3] mto: thread2(): hurray threads! + 7405 INFO [1] mto: thread1(): hurray threads! + 7609 INFO [2] mto: thread2(): hurray threads! + 7609 INFO [3] mto: thread2(): hurray threads! + 8105 INFO [1] mto: thread1(): hurray threads! + 8413 INFO [2] mto: thread2(): hurray threads! + 8413 INFO [3] mto: thread2(): hurray threads! + 8805 INFO [1] mto: thread1(): hurray threads! + + The first column is the number of milliseconds since program start, the second + column is the logging level, the third column is the thread id, and the rest + is the log message. +*/ + + +#include <iostream> +#include <dlib/threads.h> +#include <dlib/misc_api.h> // for dlib::sleep +#include <dlib/logger.h> + +using namespace std; +using namespace dlib; + +logger dlog("mto"); + +class my_object : public multithreaded_object +{ +public: + my_object() + { + // register which functions we want to run as threads. We want one thread running + // thread1() and two threads to run thread2(). So we will have a total of 3 threads + // running. + register_thread(*this,&my_object::thread1); + register_thread(*this,&my_object::thread2); + register_thread(*this,&my_object::thread2); + + // start all our registered threads going by calling the start() function + start(); + } + + ~my_object() + { + // Tell the thread() function to stop. This will cause should_stop() to + // return true so the thread knows what to do. + stop(); + + // Wait for the threads to stop before letting this object destruct itself. + // Also note, you are *required* to wait for the threads to end before + // letting this object destruct itself. + wait(); + } + +private: + + void thread1() + { + // This is a thread. It will loop until it is told that it should terminate. + while (should_stop() == false) + { + dlog << LINFO << "thread1(): hurray threads!"; + dlib::sleep(700); + } + } + + void thread2() + { + // This is a thread. It will loop until it is told that it should terminate. + while (should_stop() == false) + { + dlog << LINFO << "thread2(): hurray threads!"; + dlib::sleep(800); + } + } + +}; + +int main() +{ + // tell the logger to output all messages + dlog.set_level(LALL); + + // Create an instance of our multi-threaded object. + my_object t; + + dlib::sleep(3000); + + // Tell the multi-threaded object to pause its threads. This causes the + // threads to block on their next calls to should_stop(). + t.pause(); + dlog << LINFO << "paused threads"; + + dlib::sleep(3000); + dlog << LINFO << "starting threads back up from paused state"; + + // Tell the threads to unpause themselves. This causes should_stop() to unblock + // and to let the threads continue. + t.start(); + + dlib::sleep(3000); + + // Let the program end. When t is destructed it will gracefully terminate your + // threads because we have set the destructor up to do so. +} + + + diff --git a/ml/dlib/examples/object_detector_advanced_ex.cpp b/ml/dlib/examples/object_detector_advanced_ex.cpp new file mode 100644 index 00000000..718994e2 --- /dev/null +++ b/ml/dlib/examples/object_detector_advanced_ex.cpp @@ -0,0 +1,302 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the process for defining custom + bag-of-visual-word style feature extractors for use with the + structural_object_detection_trainer. + + NOTICE: This example assumes you are familiar with the contents of the + object_detector_ex.cpp example program. Also, if the objects you want to + detect are somewhat rigid in appearance (e.g. faces, pedestrians, etc.) + then you should try the methods shown in the fhog_object_detector_ex.cpp + example program before trying to use the bag-of-visual-word tools shown in + this example. +*/ + + +#include <dlib/svm_threaded.h> +#include <dlib/gui_widgets.h> +#include <dlib/array.h> +#include <dlib/array2d.h> +#include <dlib/image_keypoint.h> +#include <dlib/image_processing.h> + +#include <iostream> +#include <fstream> + + +using namespace std; +using namespace dlib; + +// ---------------------------------------------------------------------------------------- + +template < + typename image_array_type + > +void make_simple_test_data ( + image_array_type& images, + std::vector<std::vector<rectangle> >& object_locations +) +/*! + ensures + - #images.size() == 3 + - #object_locations.size() == 3 + - Creates some simple images to test the object detection routines. In particular, + this function creates images with white 70x70 squares in them. It also stores + the locations of these squares in object_locations. + - for all valid i: + - object_locations[i] == A list of all the white rectangles present in images[i]. +!*/ +{ + images.clear(); + object_locations.clear(); + + images.resize(3); + images[0].set_size(400,400); + images[1].set_size(400,400); + images[2].set_size(400,400); + + // set all the pixel values to black + assign_all_pixels(images[0], 0); + assign_all_pixels(images[1], 0); + assign_all_pixels(images[2], 0); + + // Now make some squares and draw them onto our black images. All the + // squares will be 70 pixels wide and tall. + + std::vector<rectangle> temp; + temp.push_back(centered_rect(point(100,100), 70,70)); + fill_rect(images[0],temp.back(),255); // Paint the square white + temp.push_back(centered_rect(point(200,300), 70,70)); + fill_rect(images[0],temp.back(),255); // Paint the square white + object_locations.push_back(temp); + + temp.clear(); + temp.push_back(centered_rect(point(140,200), 70,70)); + fill_rect(images[1],temp.back(),255); // Paint the square white + temp.push_back(centered_rect(point(303,200), 70,70)); + fill_rect(images[1],temp.back(),255); // Paint the square white + object_locations.push_back(temp); + + temp.clear(); + temp.push_back(centered_rect(point(123,121), 70,70)); + fill_rect(images[2],temp.back(),255); // Paint the square white + object_locations.push_back(temp); +} + +// ---------------------------------------------------------------------------------------- + +class very_simple_feature_extractor : noncopyable +{ + /*! + WHAT THIS OBJECT REPRESENTS + This object is a feature extractor which goes to every pixel in an image and + produces a 32 dimensional feature vector. This vector is an indicator vector + which records the pattern of pixel values in a 4-connected region. So it should + be able to distinguish basic things like whether or not a location falls on the + corner of a white box, on an edge, in the middle, etc. + + + Note that this object also implements the interface defined in dlib/image_keypoint/hashed_feature_image_abstract.h. + This means all the member functions in this object are supposed to behave as + described in the hashed_feature_image specification. So when you define your own + feature extractor objects you should probably refer yourself to that documentation + in addition to reading this example program. + !*/ + + +public: + + template < + typename image_type + > + inline void load ( + const image_type& img + ) + { + feat_image.set_size(img.nr(), img.nc()); + assign_all_pixels(feat_image,0); + for (long r = 1; r+1 < img.nr(); ++r) + { + for (long c = 1; c+1 < img.nc(); ++c) + { + unsigned char f = 0; + if (img[r][c]) f |= 0x1; + if (img[r][c+1]) f |= 0x2; + if (img[r][c-1]) f |= 0x4; + if (img[r+1][c]) f |= 0x8; + if (img[r-1][c]) f |= 0x10; + + // Store the code value for the pattern of pixel values in the 4-connected + // neighborhood around this row and column. + feat_image[r][c] = f; + } + } + } + + inline size_t size () const { return feat_image.size(); } + inline long nr () const { return feat_image.nr(); } + inline long nc () const { return feat_image.nc(); } + + inline long get_num_dimensions ( + ) const + { + // Return the dimensionality of the vectors produced by operator() + return 32; + } + + typedef std::vector<std::pair<unsigned int,double> > descriptor_type; + + inline const descriptor_type& operator() ( + long row, + long col + ) const + /*! + requires + - 0 <= row < nr() + - 0 <= col < nc() + ensures + - returns a sparse vector which describes the image at the given row and column. + In particular, this is a vector that is 0 everywhere except for one element. + !*/ + { + feat.clear(); + const unsigned long only_nonzero_element_index = feat_image[row][col]; + feat.push_back(make_pair(only_nonzero_element_index,1.0)); + return feat; + } + + // This block of functions is meant to provide a way to map between the row/col space taken by + // this object's operator() function and the images supplied to load(). In this example it's trivial. + // However, in general, you might create feature extractors which don't perform extraction at every + // possible image location (e.g. the hog_image) and thus result in some more complex mapping. + inline const rectangle get_block_rect ( long row, long col) const { return centered_rect(col,row,3,3); } + inline const point image_to_feat_space ( const point& p) const { return p; } + inline const rectangle image_to_feat_space ( const rectangle& rect) const { return rect; } + inline const point feat_to_image_space ( const point& p) const { return p; } + inline const rectangle feat_to_image_space ( const rectangle& rect) const { return rect; } + + inline friend void serialize ( const very_simple_feature_extractor& item, std::ostream& out) { serialize(item.feat_image, out); } + inline friend void deserialize ( very_simple_feature_extractor& item, std::istream& in ) { deserialize(item.feat_image, in); } + + void copy_configuration ( const very_simple_feature_extractor& item){} + +private: + array2d<unsigned char> feat_image; + + // This variable doesn't logically contribute to the state of this object. It is here + // only to avoid returning a descriptor_type object by value inside the operator() method. + mutable descriptor_type feat; +}; + +// ---------------------------------------------------------------------------------------- + +int main() +{ + try + { + // Get some data + dlib::array<array2d<unsigned char> > images; + std::vector<std::vector<rectangle> > object_locations; + make_simple_test_data(images, object_locations); + + + typedef scan_image_pyramid<pyramid_down<5>, very_simple_feature_extractor> image_scanner_type; + image_scanner_type scanner; + // Instead of using setup_grid_detection_templates() like in object_detector_ex.cpp, let's manually + // setup the sliding window box. We use a window with the same shape as the white boxes we + // are trying to detect. + const rectangle object_box = compute_box_dimensions(1, // width/height ratio + 70*70 // box area + ); + scanner.add_detection_template(object_box, create_grid_detection_template(object_box,2,2)); + + // Since our sliding window is already the right size to detect our objects we don't need + // to use an image pyramid. So setting this to 1 turns off the image pyramid. + scanner.set_max_pyramid_levels(1); + + + // While the very_simple_feature_extractor doesn't have any parameters, when you go solve + // real problems you might define a feature extractor which has some non-trivial parameters + // that need to be setup before it can be used. So you need to be able to pass these parameters + // to the scanner object somehow. You can do this using the copy_configuration() function as + // shown below. + very_simple_feature_extractor fe; + /* + setup the parameters in the fe object. + ... + */ + // The scanner will use very_simple_feature_extractor::copy_configuration() to copy the state + // of fe into its internal feature extractor. + scanner.copy_configuration(fe); + + + + + // Now that we have defined the kind of sliding window classifier system we want and stored + // the details into the scanner object we are ready to use the structural_object_detection_trainer + // to learn the weight vector and threshold needed to produce a complete object detector. + structural_object_detection_trainer<image_scanner_type> trainer(scanner); + trainer.set_num_threads(4); // Set this to the number of processing cores on your machine. + + + // The trainer will try and find the detector which minimizes the number of detection mistakes. + // This function controls how it decides if a detection output is a mistake or not. The bigger + // the input to this function the more strict it is in deciding if the detector is correctly + // hitting the targets. Try reducing the value to 0.001 and observing the results. You should + // see that the detections aren't exactly on top of the white squares anymore. See the documentation + // for the structural_object_detection_trainer and structural_svm_object_detection_problem objects + // for a more detailed discussion of this parameter. + trainer.set_match_eps(0.95); + + + object_detector<image_scanner_type> detector = trainer.train(images, object_locations); + + // We can easily test the new detector against our training data. This print + // statement will indicate that it has perfect precision and recall on this simple + // task. It will also print the average precision (AP). + cout << "Test detector (precision,recall,AP): " << test_object_detection_function(detector, images, object_locations) << endl; + + // The cross validation should also indicate perfect precision and recall. + cout << "3-fold cross validation (precision,recall,AP): " + << cross_validate_object_detection_trainer(trainer, images, object_locations, 3) << endl; + + + /* + It is also worth pointing out that you don't have to use dlib::array2d objects to + represent your images. In fact, you can use any object, even something like a struct + of many images and other things as the "image". The only requirements on an image + are that it should be possible to pass it to scanner.load(). So if you can say + scanner.load(images[0]), for example, then you are good to go. See the documentation + for scan_image_pyramid::load() for more details. + */ + + + // Let's display the output of the detector along with our training images. + image_window win; + for (unsigned long i = 0; i < images.size(); ++i) + { + // Run the detector on images[i] + const std::vector<rectangle> rects = detector(images[i]); + cout << "Number of detections: "<< rects.size() << endl; + + // Put the image and detections into the window. + win.clear_overlay(); + win.set_image(images[i]); + win.add_overlay(rects, rgb_pixel(255,0,0)); + + cout << "Hit enter to see the next image."; + cin.get(); + } + + } + catch (exception& e) + { + cout << "\nexception thrown!" << endl; + cout << e.what() << endl; + } +} + +// ---------------------------------------------------------------------------------------- + + diff --git a/ml/dlib/examples/object_detector_ex.cpp b/ml/dlib/examples/object_detector_ex.cpp new file mode 100644 index 00000000..cda71eb5 --- /dev/null +++ b/ml/dlib/examples/object_detector_ex.cpp @@ -0,0 +1,263 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of dlib's bag-of-visual-word based + tools for detecting objects in images. In this example we will create three + simple images, each containing some white squares. We will then use the + sliding window classifier tools to learn to detect these squares. + + If the objects you want to detect are somewhat rigid in appearance (e.g. + faces, pedestrians, etc.) then you should try the methods shown in the + fhog_object_detector_ex.cpp example program before trying to use the + bag-of-visual-word tools shown in this example. +*/ + + +#include <dlib/svm_threaded.h> +#include <dlib/gui_widgets.h> +#include <dlib/array.h> +#include <dlib/array2d.h> +#include <dlib/image_keypoint.h> +#include <dlib/image_processing.h> + +#include <iostream> +#include <fstream> + + +using namespace std; +using namespace dlib; + +// ---------------------------------------------------------------------------------------- + +template < + typename image_array_type + > +void make_simple_test_data ( + image_array_type& images, + std::vector<std::vector<rectangle> >& object_locations +) +/*! + ensures + - #images.size() == 3 + - #object_locations.size() == 3 + - Creates some simple images to test the object detection routines. In particular, + this function creates images with white 70x70 squares in them. It also stores + the locations of these squares in object_locations. + - for all valid i: + - object_locations[i] == A list of all the white rectangles present in images[i]. +!*/ +{ + images.clear(); + object_locations.clear(); + + images.resize(3); + images[0].set_size(400,400); + images[1].set_size(400,400); + images[2].set_size(400,400); + + // set all the pixel values to black + assign_all_pixels(images[0], 0); + assign_all_pixels(images[1], 0); + assign_all_pixels(images[2], 0); + + // Now make some squares and draw them onto our black images. All the + // squares will be 70 pixels wide and tall. + + std::vector<rectangle> temp; + temp.push_back(centered_rect(point(100,100), 70,70)); + fill_rect(images[0],temp.back(),255); // Paint the square white + temp.push_back(centered_rect(point(200,300), 70,70)); + fill_rect(images[0],temp.back(),255); // Paint the square white + object_locations.push_back(temp); + + temp.clear(); + temp.push_back(centered_rect(point(140,200), 70,70)); + fill_rect(images[1],temp.back(),255); // Paint the square white + temp.push_back(centered_rect(point(303,200), 70,70)); + fill_rect(images[1],temp.back(),255); // Paint the square white + object_locations.push_back(temp); + + temp.clear(); + temp.push_back(centered_rect(point(123,121), 70,70)); + fill_rect(images[2],temp.back(),255); // Paint the square white + object_locations.push_back(temp); + + // corrupt each image with random noise just to make this a little more + // challenging + dlib::rand rnd; + for (unsigned long i = 0; i < images.size(); ++i) + { + for (long r = 0; r < images[i].nr(); ++r) + { + for (long c = 0; c < images[i].nc(); ++c) + { + images[i][r][c] = put_in_range(0,255,images[i][r][c] + 40*rnd.get_random_gaussian()); + } + } + } +} + +// ---------------------------------------------------------------------------------------- + +int main() +{ + try + { + // The first thing we do is create the set of 3 images discussed above. + dlib::array<array2d<unsigned char> > images; + std::vector<std::vector<rectangle> > object_locations; + make_simple_test_data(images, object_locations); + + + /* + This next block of code specifies the type of sliding window classifier we will + be using to detect the white squares. The most important thing here is the + scan_image_pyramid template. Instances of this template represent the core + of a sliding window classifier. To go into more detail, the sliding window + classifiers used by this object have three parts: + 1. The underlying feature extraction. See the dlib documentation for a detailed + discussion of how the hashed_feature_image and hog_image feature extractors + work. However, to understand this example, all you need to know is that the + feature extractor associates a vector with each location in an image. This + vector is supposed to capture information which describes how parts of the + image look. Importantly, it should do this in a way that is relevant to the + problem you are trying to solve. + + 2. A detection template. This is a rectangle which defines the shape of a + sliding window (i.e. the object_box), as well as a set of rectangular feature + extraction regions inside it. This set of regions defines the spatial + structure of the overall feature extraction within a sliding window. In + particular, each location of a sliding window has a feature vector + associated with it. This feature vector is defined as follows: + - Let N denote the number of feature extraction zones. + - Let M denote the dimensionality of the vectors output by Feature_extractor_type + objects. + - Let F(i) == the M dimensional vector which is the sum of all vectors + given by our Feature_extractor_type object inside the ith feature extraction + zone. + - Then the feature vector for a sliding window is an M*N dimensional vector + [F(1) F(2) F(3) ... F(N)] (i.e. it is a concatenation of the N vectors). + This feature vector can be thought of as a collection of N "bags of features", + each bag coming from a spatial location determined by one of the rectangular + feature extraction zones. + + 3. A weight vector and a threshold value. The dot product between the weight + vector and the feature vector for a sliding window location gives the score + of the window. If this score is greater than the threshold value then the + window location is output as a detection. You don't need to determine these + parameters yourself. They are automatically populated by the + structural_object_detection_trainer. + + The sliding window classifiers described above are applied to every level of an + image pyramid. So you need to tell scan_image_pyramid what kind of pyramid you want + to use. In this case we are using pyramid_down<2> which downsamples each pyramid + layer by half (if you want to use a finer image pyramid then just change the + template argument to a larger value. For example, using pyramid_down<5> would + downsample each layer by a ratio of 5 to 4). + + Finally, some of the feature extraction zones are allowed to move freely within the + object box. This means that when we are sliding the classifier over an image, some + feature extraction zones are stationary (i.e. always in the same place relative to + the object box) while others are allowed to move anywhere within the object box. In + particular, the movable regions are placed at the locations that maximize the score + of the classifier. Note further that each of the movable feature extraction zones + must pass a threshold test for it to be included. That is, if the score that a + movable zone would contribute to the overall score for a sliding window location is + not positive then that zone is not included in the feature vector (i.e. its part of + the feature vector is set to zero. This way the length of the feature vector stays + constant). This movable region construction allows us to represent objects with + parts that move around relative to the object box. For example, a human has hands + but they aren't always in the same place relative to a person's bounding box. + However, to keep this example program simple, we will only be using stationary + feature extraction regions. + */ + typedef hashed_feature_image<hog_image<3,3,1,4,hog_signed_gradient,hog_full_interpolation> > feature_extractor_type; + typedef scan_image_pyramid<pyramid_down<2>, feature_extractor_type> image_scanner_type; + image_scanner_type scanner; + + // The hashed_feature_image in the scanner needs to be supplied with a hash function capable + // of hashing the outputs of the hog_image. Calling this function will set it up for us. The + // 10 here indicates that it will hash HOG vectors into the range [0, pow(2,10)). Therefore, + // the feature vectors output by the hashed_feature_image will have dimension pow(2,10). + setup_hashed_features(scanner, images, 10); + // We should also tell the scanner to use the uniform feature weighting scheme + // since it works best on the data in this example. If you don't call this + // function then it will use a slightly different weighting scheme which can give + // improved results on many normal image types. + use_uniform_feature_weights(scanner); + + // We also need to setup the detection templates the scanner will use. It is important that + // we add detection templates which are capable of matching all the output boxes we want to learn. + // For example, if object_locations contained a rectangle with a height to width ratio of 10 but + // we only added square detection templates then it would be impossible to detect this non-square + // rectangle. The setup_grid_detection_templates_verbose() routine will take care of this for us by + // looking at the contents of object_locations and automatically picking an appropriate set. Also, + // the final arguments indicate that we want our detection templates to have 4 feature extraction + // regions laid out in a 2x2 regular grid inside each sliding window. + setup_grid_detection_templates_verbose(scanner, object_locations, 2, 2); + + + // Now that we have defined the kind of sliding window classifier system we want and stored + // the details into the scanner object we are ready to use the structural_object_detection_trainer + // to learn the weight vector and threshold needed to produce a complete object detector. + structural_object_detection_trainer<image_scanner_type> trainer(scanner); + trainer.set_num_threads(4); // Set this to the number of processing cores on your machine. + + + // There are a variety of other useful parameters to the structural_object_detection_trainer. + // Examples of the ones you are most likely to use follow (see dlib documentation for what they do): + //trainer.set_match_eps(0.80); + //trainer.set_c(1.0); + //trainer.set_loss_per_missed_target(1); + //trainer.set_loss_per_false_alarm(1); + + + // Do the actual training and save the results into the detector object. + object_detector<image_scanner_type> detector = trainer.train(images, object_locations); + + // We can easily test the new detector against our training data. This print statement will indicate that it + // has perfect precision and recall on this simple task. It will also print the average precision (AP). + cout << "Test detector (precision,recall,AP): " << test_object_detection_function(detector, images, object_locations) << endl; + + // The cross validation should also indicate perfect precision and recall. + cout << "3-fold cross validation (precision,recall,AP): " + << cross_validate_object_detection_trainer(trainer, images, object_locations, 3) << endl; + + + + + // Let's display the output of the detector along with our training images. + image_window win; + for (unsigned long i = 0; i < images.size(); ++i) + { + // Run the detector on images[i] + const std::vector<rectangle> rects = detector(images[i]); + cout << "Number of detections: "<< rects.size() << endl; + + // Put the image and detections into the window. + win.clear_overlay(); + win.set_image(images[i]); + win.add_overlay(rects, rgb_pixel(255,0,0)); + + cout << "Hit enter to see the next image."; + cin.get(); + } + + + + + // Finally, note that the detector can be serialized to disk just like other dlib objects. + serialize("object_detector.dat") << detector; + + // Recall from disk. + deserialize("object_detector.dat") >> detector; + } + catch (exception& e) + { + cout << "\nexception thrown!" << endl; + cout << e.what() << endl; + } +} + +// ---------------------------------------------------------------------------------------- + diff --git a/ml/dlib/examples/one_class_classifiers_ex.cpp b/ml/dlib/examples/one_class_classifiers_ex.cpp new file mode 100644 index 00000000..3394ee76 --- /dev/null +++ b/ml/dlib/examples/one_class_classifiers_ex.cpp @@ -0,0 +1,245 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This is an example illustrating the use of the tools in dlib for doing distribution + estimation or detecting anomalies using one-class support vector machines. + + Unlike regular classifiers, these tools take unlabeled points and try to learn what + parts of the feature space normally contain data samples and which do not. Typically + you use these tools when you are interested in finding outliers or otherwise + identifying "unusual" data samples. + + In this example, we will sample points from the sinc() function to generate our set of + "typical looking" points. Then we will train some one-class classifiers and use them + to predict if new points are unusual or not. In this case, unusual means a point is + not from the sinc() curve. +*/ + +#include <iostream> +#include <vector> +#include <dlib/svm.h> +#include <dlib/gui_widgets.h> +#include <dlib/array2d.h> +#include <dlib/image_transforms.h> + +using namespace std; +using namespace dlib; + +// Here is the sinc function we will be trying to learn with the one-class SVMs +double sinc(double x) +{ + if (x == 0) + return 2; + return 2*sin(x)/x; +} + +int main() +{ + // We will use column vectors to store our points. Here we make a convenient typedef + // for the kind of vector we will use. + typedef matrix<double,0,1> sample_type; + + // Then we select the kernel we want to use. For our present problem the radial basis + // kernel is quite effective. + typedef radial_basis_kernel<sample_type> kernel_type; + + // Now make the object responsible for training one-class SVMs. + svm_one_class_trainer<kernel_type> trainer; + // Here we set the width of the radial basis kernel to 4.0. Larger values make the + // width smaller and give the radial basis kernel more resolution. If you play with + // the value and observe the program output you will get a more intuitive feel for what + // that means. + trainer.set_kernel(kernel_type(4.0)); + + // Now sample some 2D points. The points will be located on the curve defined by the + // sinc() function. + std::vector<sample_type> samples; + sample_type m(2); + for (double x = -15; x <= 8; x += 0.3) + { + m(0) = x; + m(1) = sinc(x); + samples.push_back(m); + } + + // Now train a one-class SVM. The result is a function, df(), that outputs large + // values for points from the sinc() curve and smaller values for points that are + // anomalous (i.e. not on the sinc() curve in our case). + decision_function<kernel_type> df = trainer.train(samples); + + // So for example, let's look at the output from some points on the sinc() curve. + cout << "Points that are on the sinc function:\n"; + m(0) = -1.5; m(1) = sinc(m(0)); cout << " " << df(m) << endl; + m(0) = -1.5; m(1) = sinc(m(0)); cout << " " << df(m) << endl; + m(0) = -0; m(1) = sinc(m(0)); cout << " " << df(m) << endl; + m(0) = -0.5; m(1) = sinc(m(0)); cout << " " << df(m) << endl; + m(0) = -4.1; m(1) = sinc(m(0)); cout << " " << df(m) << endl; + m(0) = -1.5; m(1) = sinc(m(0)); cout << " " << df(m) << endl; + m(0) = -0.5; m(1) = sinc(m(0)); cout << " " << df(m) << endl; + + cout << endl; + // Now look at some outputs for points not on the sinc() curve. You will see that + // these values are all notably smaller. + cout << "Points that are NOT on the sinc function:\n"; + m(0) = -1.5; m(1) = sinc(m(0))+4; cout << " " << df(m) << endl; + m(0) = -1.5; m(1) = sinc(m(0))+3; cout << " " << df(m) << endl; + m(0) = -0; m(1) = -sinc(m(0)); cout << " " << df(m) << endl; + m(0) = -0.5; m(1) = -sinc(m(0)); cout << " " << df(m) << endl; + m(0) = -4.1; m(1) = sinc(m(0))+2; cout << " " << df(m) << endl; + m(0) = -1.5; m(1) = sinc(m(0))+0.9; cout << " " << df(m) << endl; + m(0) = -0.5; m(1) = sinc(m(0))+1; cout << " " << df(m) << endl; + + // The output is as follows: + /* + Points that are on the sinc function: + 0.000389691 + 0.000389691 + -0.000239037 + -0.000179978 + -0.000178491 + 0.000389691 + -0.000179978 + + Points that are NOT on the sinc function: + -0.269389 + -0.269389 + -0.269389 + -0.269389 + -0.269389 + -0.239954 + -0.264318 + */ + + // So we can see that in this example the one-class SVM correctly indicates that + // the non-sinc points are definitely not points from the sinc() curve. + + + // It should be noted that the svm_one_class_trainer becomes very slow when you have + // more than 10 or 20 thousand training points. However, dlib comes with very fast SVM + // tools which you can use instead at the cost of a little more setup. In particular, + // it is possible to use one of dlib's very fast linear SVM solvers to train a one + // class SVM. This is what we do below. We will train on 115,000 points and it only + // takes a few seconds with this tool! + // + // The first step is constructing a feature space that is appropriate for use with a + // linear SVM. In general, this is quite problem dependent. However, if you have + // under about a hundred dimensions in your vectors then it can often be quite + // effective to use the empirical_kernel_map as we do below (see the + // empirical_kernel_map documentation and example program for an extended discussion of + // what it does). + // + // But putting the empirical_kernel_map aside, the most important step in turning a + // linear SVM into a one-class SVM is the following. We append a -1 value onto the end + // of each feature vector and then tell the trainer to force the weight for this + // feature to 1. This means that if the linear SVM assigned all other weights a value + // of 0 then the output from a learned decision function would always be -1. The + // second step is that we ask the SVM to label each training sample with +1. This + // causes the SVM to set the other feature weights such that the training samples have + // positive outputs from the learned decision function. But the starting bias for all + // the points in the whole feature space is -1. The result is that points outside our + // training set will not be affected, so their outputs from the decision function will + // remain close to -1. + + empirical_kernel_map<kernel_type> ekm; + ekm.load(trainer.get_kernel(),samples); + + samples.clear(); + std::vector<double> labels; + // make a vector with just 1 element in it equal to -1. + sample_type bias(1); + bias = -1; + sample_type augmented; + // This time sample 115,000 points from the sinc() function. + for (double x = -15; x <= 8; x += 0.0002) + { + m(0) = x; + m(1) = sinc(x); + // Apply the empirical_kernel_map transformation and then append the -1 value + augmented = join_cols(ekm.project(m), bias); + samples.push_back(augmented); + labels.push_back(+1); + } + cout << "samples.size(): "<< samples.size() << endl; + + // The svm_c_linear_dcd_trainer is a very fast SVM solver which only works with the + // linear_kernel. It has the nice feature of supporting this "force_last_weight_to_1" + // mode we discussed above. + svm_c_linear_dcd_trainer<linear_kernel<sample_type> > linear_trainer; + linear_trainer.force_last_weight_to_1(true); + + // Train the SVM + decision_function<linear_kernel<sample_type> > df2 = linear_trainer.train(samples, labels); + + // Here we test it as before, again we note that points from the sinc() curve have + // large outputs from the decision function. Note also that we must remember to + // transform the points in exactly the same manner used to construct the training set + // before giving them to df2() or the code will not work. + cout << "Points that are on the sinc function:\n"; + m(0) = -1.5; m(1) = sinc(m(0)); cout << " " << df2(join_cols(ekm.project(m),bias)) << endl; + m(0) = -1.5; m(1) = sinc(m(0)); cout << " " << df2(join_cols(ekm.project(m),bias)) << endl; + m(0) = -0; m(1) = sinc(m(0)); cout << " " << df2(join_cols(ekm.project(m),bias)) << endl; + m(0) = -0.5; m(1) = sinc(m(0)); cout << " " << df2(join_cols(ekm.project(m),bias)) << endl; + m(0) = -4.1; m(1) = sinc(m(0)); cout << " " << df2(join_cols(ekm.project(m),bias)) << endl; + m(0) = -1.5; m(1) = sinc(m(0)); cout << " " << df2(join_cols(ekm.project(m),bias)) << endl; + m(0) = -0.5; m(1) = sinc(m(0)); cout << " " << df2(join_cols(ekm.project(m),bias)) << endl; + + cout << endl; + // Again, we see here that points not on the sinc() function have small values. + cout << "Points that are NOT on the sinc function:\n"; + m(0) = -1.5; m(1) = sinc(m(0))+4; cout << " " << df2(join_cols(ekm.project(m),bias)) << endl; + m(0) = -1.5; m(1) = sinc(m(0))+3; cout << " " << df2(join_cols(ekm.project(m),bias)) << endl; + m(0) = -0; m(1) = -sinc(m(0)); cout << " " << df2(join_cols(ekm.project(m),bias)) << endl; + m(0) = -0.5; m(1) = -sinc(m(0)); cout << " " << df2(join_cols(ekm.project(m),bias)) << endl; + m(0) = -4.1; m(1) = sinc(m(0))+2; cout << " " << df2(join_cols(ekm.project(m),bias)) << endl; + m(0) = -1.5; m(1) = sinc(m(0))+0.9; cout << " " << df2(join_cols(ekm.project(m),bias)) << endl; + m(0) = -0.5; m(1) = sinc(m(0))+1; cout << " " << df2(join_cols(ekm.project(m),bias)) << endl; + + + // The output is as follows: + /* + Points that are on the sinc function: + 1.00454 + 1.00454 + 1.00022 + 1.00007 + 1.00371 + 1.00454 + 1.00007 + + Points that are NOT on the sinc function: + -1 + -1 + -1 + -1 + -0.999998 + -0.781231 + -0.96242 + */ + + + // Finally, to help you visualize what is happening here we are going to plot the + // response of the one-class classifiers on the screen. The code below creates two + // heatmap images which show the response. In these images you can clearly see where + // the algorithms have identified the sinc() curve. The hotter the pixel looks, the + // larger the value coming out of the decision function and therefore the more "normal" + // it is according to the classifier. + const long size = 500; + array2d<double> img1(size,size); + array2d<double> img2(size,size); + for (long r = 0; r < img1.nr(); ++r) + { + for (long c = 0; c < img1.nc(); ++c) + { + double x = 30.0*c/size - 19; + double y = 8.0*r/size - 4; + m(0) = x; + m(1) = y; + img1[r][c] = df(m); + img2[r][c] = df2(join_cols(ekm.project(m),bias)); + } + } + image_window win1(heatmap(img1), "svm_one_class_trainer"); + image_window win2(heatmap(img2), "svm_c_linear_dcd_trainer"); + win1.wait_until_closed(); +} + + diff --git a/ml/dlib/examples/optimization_ex.cpp b/ml/dlib/examples/optimization_ex.cpp new file mode 100644 index 00000000..2d35fa81 --- /dev/null +++ b/ml/dlib/examples/optimization_ex.cpp @@ -0,0 +1,319 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use the general purpose non-linear + optimization routines from the dlib C++ Library. + + The library provides implementations of many popular algorithms such as L-BFGS + and BOBYQA. These algorithms allow you to find the minimum or maximum of a + function of many input variables. This example walks though a few of the ways + you might put these routines to use. + +*/ + + +#include <dlib/optimization.h> +#include <dlib/global_optimization.h> +#include <iostream> + + +using namespace std; +using namespace dlib; + +// ---------------------------------------------------------------------------------------- + +// In dlib, most of the general purpose solvers optimize functions that take a +// column vector as input and return a double. So here we make a typedef for a +// variable length column vector of doubles. This is the type we will use to +// represent the input to our objective functions which we will be minimizing. +typedef matrix<double,0,1> column_vector; + +// ---------------------------------------------------------------------------------------- +// Below we create a few functions. When you get down into main() you will see that +// we can use the optimization algorithms to find the minimums of these functions. +// ---------------------------------------------------------------------------------------- + +double rosen (const column_vector& m) +/* + This function computes what is known as Rosenbrock's function. It is + a function of two input variables and has a global minimum at (1,1). + So when we use this function to test out the optimization algorithms + we will see that the minimum found is indeed at the point (1,1). +*/ +{ + const double x = m(0); + const double y = m(1); + + // compute Rosenbrock's function and return the result + return 100.0*pow(y - x*x,2) + pow(1 - x,2); +} + +// This is a helper function used while optimizing the rosen() function. +const column_vector rosen_derivative (const column_vector& m) +/*! + ensures + - returns the gradient vector for the rosen function +!*/ +{ + const double x = m(0); + const double y = m(1); + + // make us a column vector of length 2 + column_vector res(2); + + // now compute the gradient vector + res(0) = -400*x*(y-x*x) - 2*(1-x); // derivative of rosen() with respect to x + res(1) = 200*(y-x*x); // derivative of rosen() with respect to y + return res; +} + +// This function computes the Hessian matrix for the rosen() fuction. This is +// the matrix of second derivatives. +matrix<double> rosen_hessian (const column_vector& m) +{ + const double x = m(0); + const double y = m(1); + + matrix<double> res(2,2); + + // now compute the second derivatives + res(0,0) = 1200*x*x - 400*y + 2; // second derivative with respect to x + res(1,0) = res(0,1) = -400*x; // derivative with respect to x and y + res(1,1) = 200; // second derivative with respect to y + return res; +} + +// ---------------------------------------------------------------------------------------- + +class rosen_model +{ + /*! + This object is a "function model" which can be used with the + find_min_trust_region() routine. + !*/ + +public: + typedef ::column_vector column_vector; + typedef matrix<double> general_matrix; + + double operator() ( + const column_vector& x + ) const { return rosen(x); } + + void get_derivative_and_hessian ( + const column_vector& x, + column_vector& der, + general_matrix& hess + ) const + { + der = rosen_derivative(x); + hess = rosen_hessian(x); + } +}; + +// ---------------------------------------------------------------------------------------- + +int main() try +{ + // Set the starting point to (4,8). This is the point the optimization algorithm + // will start out from and it will move it closer and closer to the function's + // minimum point. So generally you want to try and compute a good guess that is + // somewhat near the actual optimum value. + column_vector starting_point = {4, 8}; + + // The first example below finds the minimum of the rosen() function and uses the + // analytical derivative computed by rosen_derivative(). Since it is very easy to + // make a mistake while coding a function like rosen_derivative() it is a good idea + // to compare your derivative function against a numerical approximation and see if + // the results are similar. If they are very different then you probably made a + // mistake. So the first thing we do is compare the results at a test point: + cout << "Difference between analytic derivative and numerical approximation of derivative: " + << length(derivative(rosen)(starting_point) - rosen_derivative(starting_point)) << endl; + + + cout << "Find the minimum of the rosen function()" << endl; + // Now we use the find_min() function to find the minimum point. The first argument + // to this routine is the search strategy we want to use. The second argument is the + // stopping strategy. Below I'm using the objective_delta_stop_strategy which just + // says that the search should stop when the change in the function being optimized + // is small enough. + + // The other arguments to find_min() are the function to be minimized, its derivative, + // then the starting point, and the last is an acceptable minimum value of the rosen() + // function. That is, if the algorithm finds any inputs to rosen() that gives an output + // value <= -1 then it will stop immediately. Usually you supply a number smaller than + // the actual global minimum. So since the smallest output of the rosen function is 0 + // we just put -1 here which effectively causes this last argument to be disregarded. + + find_min(bfgs_search_strategy(), // Use BFGS search algorithm + objective_delta_stop_strategy(1e-7), // Stop when the change in rosen() is less than 1e-7 + rosen, rosen_derivative, starting_point, -1); + // Once the function ends the starting_point vector will contain the optimum point + // of (1,1). + cout << "rosen solution:\n" << starting_point << endl; + + + // Now let's try doing it again with a different starting point and the version + // of find_min() that doesn't require you to supply a derivative function. + // This version will compute a numerical approximation of the derivative since + // we didn't supply one to it. + starting_point = {-94, 5.2}; + find_min_using_approximate_derivatives(bfgs_search_strategy(), + objective_delta_stop_strategy(1e-7), + rosen, starting_point, -1); + // Again the correct minimum point is found and stored in starting_point + cout << "rosen solution:\n" << starting_point << endl; + + + // Here we repeat the same thing as above but this time using the L-BFGS + // algorithm. L-BFGS is very similar to the BFGS algorithm, however, BFGS + // uses O(N^2) memory where N is the size of the starting_point vector. + // The L-BFGS algorithm however uses only O(N) memory. So if you have a + // function of a huge number of variables the L-BFGS algorithm is probably + // a better choice. + starting_point = {0.8, 1.3}; + find_min(lbfgs_search_strategy(10), // The 10 here is basically a measure of how much memory L-BFGS will use. + objective_delta_stop_strategy(1e-7).be_verbose(), // Adding be_verbose() causes a message to be + // printed for each iteration of optimization. + rosen, rosen_derivative, starting_point, -1); + + cout << endl << "rosen solution: \n" << starting_point << endl; + + starting_point = {-94, 5.2}; + find_min_using_approximate_derivatives(lbfgs_search_strategy(10), + objective_delta_stop_strategy(1e-7), + rosen, starting_point, -1); + cout << "rosen solution: \n"<< starting_point << endl; + + + + + // dlib also supports solving functions subject to bounds constraints on + // the variables. So for example, if you wanted to find the minimizer + // of the rosen function where both input variables were in the range + // 0.1 to 0.8 you would do it like this: + starting_point = {0.1, 0.1}; // Start with a valid point inside the constraint box. + find_min_box_constrained(lbfgs_search_strategy(10), + objective_delta_stop_strategy(1e-9), + rosen, rosen_derivative, starting_point, 0.1, 0.8); + // Here we put the same [0.1 0.8] range constraint on each variable, however, you + // can put different bounds on each variable by passing in column vectors of + // constraints for the last two arguments rather than scalars. + + cout << endl << "constrained rosen solution: \n" << starting_point << endl; + + // You can also use an approximate derivative like so: + starting_point = {0.1, 0.1}; + find_min_box_constrained(bfgs_search_strategy(), + objective_delta_stop_strategy(1e-9), + rosen, derivative(rosen), starting_point, 0.1, 0.8); + cout << endl << "constrained rosen solution: \n" << starting_point << endl; + + + + + // In many cases, it is useful if we also provide second derivative information + // to the optimizers. Two examples of how we can do that are shown below. + starting_point = {0.8, 1.3}; + find_min(newton_search_strategy(rosen_hessian), + objective_delta_stop_strategy(1e-7), + rosen, + rosen_derivative, + starting_point, + -1); + cout << "rosen solution: \n"<< starting_point << endl; + + // We can also use find_min_trust_region(), which is also a method which uses + // second derivatives. For some kinds of non-convex function it may be more + // reliable than using a newton_search_strategy with find_min(). + starting_point = {0.8, 1.3}; + find_min_trust_region(objective_delta_stop_strategy(1e-7), + rosen_model(), + starting_point, + 10 // initial trust region radius + ); + cout << "rosen solution: \n"<< starting_point << endl; + + + + + + // Next, let's try the BOBYQA algorithm. This is a technique specially + // designed to minimize a function in the absence of derivative information. + // Generally speaking, it is the method of choice if derivatives are not available + // and the function you are optimizing is smooth and has only one local optima. As + // an example, consider the be_like_target function defined below: + column_vector target = {3, 5, 1, 7}; + auto be_like_target = [&](const column_vector& x) { + return mean(squared(x-target)); + }; + starting_point = {-4,5,99,3}; + find_min_bobyqa(be_like_target, + starting_point, + 9, // number of interpolation points + uniform_matrix<double>(4,1, -1e100), // lower bound constraint + uniform_matrix<double>(4,1, 1e100), // upper bound constraint + 10, // initial trust region radius + 1e-6, // stopping trust region radius + 100 // max number of objective function evaluations + ); + cout << "be_like_target solution:\n" << starting_point << endl; + + + + + + // Finally, let's try the find_min_global() routine. Like find_min_bobyqa(), + // this technique is specially designed to minimize a function in the absence + // of derivative information. However, it is also designed to handle + // functions with many local optima. Where BOBYQA would get stuck at the + // nearest local optima, find_min_global() won't. find_min_global() uses a + // global optimization method based on a combination of non-parametric global + // function modeling and BOBYQA style quadratic trust region modeling to + // efficiently find a global minimizer. It usually does a good job with a + // relatively small number of calls to the function being optimized. + // + // You also don't have to give it a starting point or set any parameters, + // other than defining bounds constraints. This makes it the method of + // choice for derivative free optimization in the presence of multiple local + // optima. Its API also allows you to define functions that take a + // column_vector as shown above or to explicitly use named doubles as + // arguments, which we do here. + auto complex_holder_table = [](double x0, double x1) + { + // This function is a version of the well known Holder table test + // function, which is a function containing a bunch of local optima. + // Here we make it even more difficult by adding more local optima + // and also a bunch of discontinuities. + + // add discontinuities + double sign = 1; + for (double j = -4; j < 9; j += 0.5) + { + if (j < x0 && x0 < j+0.5) + x0 += sign*0.25; + sign *= -1; + } + // Holder table function tilted towards 10,10 and with additional + // high frequency terms to add more local optima. + return -( std::abs(sin(x0)*cos(x1)*exp(std::abs(1-std::sqrt(x0*x0+x1*x1)/pi))) -(x0+x1)/10 - sin(x0*10)*cos(x1*10)); + }; + + // To optimize this difficult function all we need to do is call + // find_min_global() + auto result = find_min_global(complex_holder_table, + {-10,-10}, // lower bounds + {10,10}, // upper bounds + max_function_calls(300)); + + cout.precision(9); + // These cout statements will show that find_min_global() found the + // globally optimal solution to 9 digits of precision: + cout << "complex holder table function solution y (should be -21.9210397): " << result.y << endl; + cout << "complex holder table function solution x:\n" << result.x << endl; +} +catch (std::exception& e) +{ + cout << e.what() << endl; +} + diff --git a/ml/dlib/examples/parallel_for_ex.cpp b/ml/dlib/examples/parallel_for_ex.cpp new file mode 100644 index 00000000..70fccab2 --- /dev/null +++ b/ml/dlib/examples/parallel_for_ex.cpp @@ -0,0 +1,158 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the parallel for loop tools from the dlib + C++ Library. + + Normally, a for loop executes the body of the loop in a serial manner. This means + that, for example, if it takes 1 second to execute the body of the loop and the body + needs to execute 10 times then it will take 10 seconds to execute the entire loop. + However, on modern multi-core computers we have the opportunity to speed this up by + executing multiple steps of a for loop in parallel. This example program will walk you + though a few examples showing how to do just that. +*/ + + +#include <dlib/threads.h> +#include <dlib/misc_api.h> // for dlib::sleep +#include <vector> +#include <iostream> + +using namespace dlib; +using namespace std; + +// ---------------------------------------------------------------------------------------- + +void print(const std::vector<int>& vect) +{ + for (unsigned long i = 0; i < vect.size(); ++i) + { + cout << vect[i] << endl; + } + cout << "\n**************************************\n"; +} + +// ---------------------------------------------------------------------------------------- + +void example_using_regular_non_parallel_loops(); +void example_using_lambda_functions(); + +// ---------------------------------------------------------------------------------------- + +int main() +{ + // We have 2 examples, each contained in a separate function. Both examples perform + // exactly the same computation, however, the second does so using parallel for loops. + // The first example is here to show you what we are doing in terms of classical + // non-parallel for loops. The other example will illustrate how to parallelize the + // for loops in C++11. + + example_using_regular_non_parallel_loops(); + example_using_lambda_functions(); +} + +// ---------------------------------------------------------------------------------------- + +void example_using_regular_non_parallel_loops() +{ + cout << "\nExample using regular non-parallel for loops\n" << endl; + + std::vector<int> vect; + + // put 10 elements into vect which are all equal to -1 + vect.assign(10, -1); + + // Now set each element equal to its index value. We put a sleep call in here so that + // when we run the same thing with a parallel for loop later on you will be able to + // observe the speedup. + for (unsigned long i = 0; i < vect.size(); ++i) + { + vect[i] = i; + dlib::sleep(1000); // sleep for 1 second + } + print(vect); + + + + // Assign only part of the elements in vect. + vect.assign(10, -1); + for (unsigned long i = 1; i < 5; ++i) + { + vect[i] = i; + dlib::sleep(1000); + } + print(vect); + + + + // Sum all element sin vect. + int sum = 0; + vect.assign(10, 2); + for (unsigned long i = 0; i < vect.size(); ++i) + { + dlib::sleep(1000); + sum += vect[i]; + } + + cout << "sum: "<< sum << endl; +} + +// ---------------------------------------------------------------------------------------- + +void example_using_lambda_functions() +{ + cout << "\nExample using parallel for loops\n" << endl; + + std::vector<int> vect; + + vect.assign(10, -1); + parallel_for(0, vect.size(), [&](long i){ + // The i variable is the loop counter as in a normal for loop. So we simply need + // to place the body of the for loop right here and we get the same behavior. The + // range for the for loop is determined by the 1nd and 2rd arguments to + // parallel_for(). This way of calling parallel_for() will use a number of threads + // that is appropriate for your hardware. See the parallel_for() documentation for + // other options. + vect[i] = i; + dlib::sleep(1000); + }); + print(vect); + + + // Assign only part of the elements in vect. + vect.assign(10, -1); + parallel_for(1, 5, [&](long i){ + vect[i] = i; + dlib::sleep(1000); + }); + print(vect); + + + // Note that things become a little more complex if the loop bodies are not totally + // independent. In the first two cases each iteration of the loop touched different + // memory locations, so we didn't need to use any kind of thread synchronization. + // However, in the summing loop we need to add some synchronization to protect the sum + // variable. This is easily accomplished by creating a mutex and locking it before + // adding to sum. More generally, you must ensure that the bodies of your parallel for + // loops are thread safe using whatever means is appropriate for your code. Since a + // parallel for loop is implemented using threads, all the usual techniques for + // ensuring thread safety can be used. + int sum = 0; + dlib::mutex m; + vect.assign(10, 2); + parallel_for(0, vect.size(), [&](long i){ + // The sleep statements still execute in parallel. + dlib::sleep(1000); + + // Lock the m mutex. The auto_mutex will automatically unlock at the closing }. + // This will ensure only one thread can execute the sum += vect[i] statement at + // a time. + auto_mutex lock(m); + sum += vect[i]; + }); + + cout << "sum: "<< sum << endl; +} + +// ---------------------------------------------------------------------------------------- + diff --git a/ml/dlib/examples/pipe_ex.cpp b/ml/dlib/examples/pipe_ex.cpp new file mode 100644 index 00000000..9298dba1 --- /dev/null +++ b/ml/dlib/examples/pipe_ex.cpp @@ -0,0 +1,172 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt + + +/* + This is an example illustrating the use of the threading API and pipe object + from the dlib C++ Library. + + In this example we will create three threads that will read "jobs" off the end of + a pipe object and process them. It shows you how you can use the pipe object + to communicate between threads. + + + Example program output: + 0 INFO [0] pipe_example: Add job 0 to pipe + 0 INFO [0] pipe_example: Add job 1 to pipe + 0 INFO [0] pipe_example: Add job 2 to pipe + 0 INFO [0] pipe_example: Add job 3 to pipe + 0 INFO [0] pipe_example: Add job 4 to pipe + 0 INFO [0] pipe_example: Add job 5 to pipe + 0 INFO [1] pipe_example: got job 0 + 0 INFO [0] pipe_example: Add job 6 to pipe + 0 INFO [2] pipe_example: got job 1 + 0 INFO [0] pipe_example: Add job 7 to pipe + 0 INFO [3] pipe_example: got job 2 + 103 INFO [0] pipe_example: Add job 8 to pipe + 103 INFO [1] pipe_example: got job 3 + 103 INFO [0] pipe_example: Add job 9 to pipe + 103 INFO [2] pipe_example: got job 4 + 103 INFO [0] pipe_example: Add job 10 to pipe + 103 INFO [3] pipe_example: got job 5 + 207 INFO [0] pipe_example: Add job 11 to pipe + 207 INFO [1] pipe_example: got job 6 + 207 INFO [0] pipe_example: Add job 12 to pipe + 207 INFO [2] pipe_example: got job 7 + 207 INFO [0] pipe_example: Add job 13 to pipe + 207 INFO [3] pipe_example: got job 8 + 311 INFO [1] pipe_example: got job 9 + 311 INFO [2] pipe_example: got job 10 + 311 INFO [3] pipe_example: got job 11 + 311 INFO [0] pipe_example: Add job 14 to pipe + 311 INFO [0] pipe_example: main ending + 311 INFO [0] pipe_example: destructing pipe object: wait for job_pipe to be empty + 415 INFO [1] pipe_example: got job 12 + 415 INFO [2] pipe_example: got job 13 + 415 INFO [3] pipe_example: got job 14 + 415 INFO [0] pipe_example: destructing pipe object: job_pipe is empty + 519 INFO [1] pipe_example: thread ending + 519 INFO [2] pipe_example: thread ending + 519 INFO [3] pipe_example: thread ending + 519 INFO [0] pipe_example: destructing pipe object: all threads have ended + + + The first column is the number of milliseconds since program start, the second + column is the logging level, the third column is the thread id, and the rest + is the log message. +*/ + + +#include <dlib/threads.h> +#include <dlib/misc_api.h> // for dlib::sleep +#include <dlib/pipe.h> +#include <dlib/logger.h> + +using namespace dlib; + +struct job +{ + /* + This object represents the jobs we are going to send out to our threads. + */ + int id; +}; + +dlib::logger dlog("pipe_example"); + +// ---------------------------------------------------------------------------------------- + +class pipe_example : private multithreaded_object +{ +public: + pipe_example( + ) : + job_pipe(4) // This 4 here is the size of our job_pipe. The significance is that + // if you try to enqueue more than 4 jobs onto the pipe then enqueue() will + // block until there is room. + { + // register 3 threads + register_thread(*this,&pipe_example::thread); + register_thread(*this,&pipe_example::thread); + register_thread(*this,&pipe_example::thread); + + // start the 3 threads we registered above + start(); + } + + ~pipe_example ( + ) + { + dlog << LINFO << "destructing pipe object: wait for job_pipe to be empty"; + // wait for all the jobs to be processed + job_pipe.wait_until_empty(); + + dlog << LINFO << "destructing pipe object: job_pipe is empty"; + + // now disable the job_pipe. doing this will cause all calls to + // job_pipe.dequeue() to return false so our threads will terminate + job_pipe.disable(); + + // now block until all the threads have terminated + wait(); + dlog << LINFO << "destructing pipe object: all threads have ended"; + } + + // Here we declare our pipe object. It will contain our job objects. + // There are only two requirements on the type of objects you can use in a + // pipe, first they must have a default constructor and second they must + // be swappable by a global swap(). + dlib::pipe<job> job_pipe; + +private: + void thread () + { + job j; + // Here we loop on jobs from the job_pipe. + while (job_pipe.dequeue(j)) + { + // process our job j in some way. + dlog << LINFO << "got job " << j.id; + + // sleep for 0.1 seconds + dlib::sleep(100); + } + dlog << LINFO << "thread ending"; + } + +}; + +// ---------------------------------------------------------------------------------------- + +int main() +{ + // Set the dlog object so that it logs everything. + dlog.set_level(LALL); + + pipe_example pe; + + for (int i = 0; i < 15; ++i) + { + dlog << LINFO << "Add job " << i << " to pipe"; + job j; + j.id = i; + + + // Add this job to the pipe. One of our three threads will get it and process it. + // It should also be pointed out that the enqueue() function uses the global + // swap function to move jobs into the pipe. This means that it modifies the + // jobs we are passing in to it. This allows you to implement a fast swap + // operator for your jobs. For example, std::vector objects have a global + // swap and it can execute in constant time by just swapping pointers inside + // std::vector. This means that the dlib::pipe is effectively a zero-copy + // message passing system if you setup global swap for your jobs. + pe.job_pipe.enqueue(j); + } + + dlog << LINFO << "main ending"; + + // the main function won't really terminate here. It will call the destructor for pe + // which will block until all the jobs have been processed. +} + +// ---------------------------------------------------------------------------------------- + diff --git a/ml/dlib/examples/pipe_ex_2.cpp b/ml/dlib/examples/pipe_ex_2.cpp new file mode 100644 index 00000000..53998dbe --- /dev/null +++ b/ml/dlib/examples/pipe_ex_2.cpp @@ -0,0 +1,160 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt + + +/* + This is an example showing how to use the type_safe_union and pipe object from + from the dlib C++ Library to send messages between threads. + + In this example we will create a class with a single thread in it. This thread + will receive messages from a pipe object and simply print them to the screen. + The interesting thing about this example is that it shows how to use a pipe and + type_safe_union to create a message channel between threads that can send many + different types of objects in a type safe manner. + + + + Program output: + got a float: 4.567 + got a string: string message + got an int: 7 + got a string: yet another string message +*/ + + +#include <dlib/threads.h> +#include <dlib/pipe.h> +#include <dlib/type_safe_union.h> +#include <iostream> + +using namespace dlib; +using namespace std; + +// ---------------------------------------------------------------------------------------- + +typedef type_safe_union<int, float, std::string> tsu_type; +/* This is a typedef for the type_safe_union we will be using in this example. + This type_safe_union object is a type-safe analogue of a union declared as follows: + union our_union_type + { + int a; + float b; + std::string c; + }; + + Note that the above union isn't actually valid C++ code because it contains a + non-POD type. That is, you can't put a std::string or any non-trivial + C++ class in a union. The type_safe_union, however, enables you to store non-POD + types such as the std::string. + +*/ + +// ---------------------------------------------------------------------------------------- + +class pipe_example : private threaded_object +{ +public: + pipe_example( + ) : + message_pipe(4) // This 4 here is the size of our message_pipe. The significance is that + // if you try to enqueue more than 4 messages onto the pipe then enqueue() will + // block until there is room. + { + // start the thread + start(); + } + + ~pipe_example ( + ) + { + // wait for all the messages to be processed + message_pipe.wait_until_empty(); + + // Now disable the message_pipe. Doing this will cause all calls to + // message_pipe.dequeue() to return false so our thread will terminate + message_pipe.disable(); + + // now block until our thread has terminated + wait(); + } + + // Here we declare our pipe object. It will contain our messages. + dlib::pipe<tsu_type> message_pipe; + +private: + + // When we call apply_to_contents() below these are the + // functions which get called. + void operator() (int val) + { + cout << "got an int: " << val << endl; + } + + void operator() (float val) + { + cout << "got a float: " << val << endl; + } + + void operator() (std::string val) + { + cout << "got a string: " << val << endl; + } + + void thread () + { + tsu_type msg; + + // Here we loop on messages from the message_pipe. + while (message_pipe.dequeue(msg)) + { + // Here we call the apply_to_contents() function on our type_safe_union. + // It takes a function object and applies that function object + // to the contents of the union. In our case we have setup + // the pipe_example class as our function object and so below we + // tell the msg object to take whatever it contains and + // call (*this)(contained_object); So what happens here is + // one of the three above functions gets called with the message + // we just got. + msg.apply_to_contents(*this); + } + } + + // Finally, note that since we declared the operator() member functions + // private we need to declare the type_safe_union as a friend of this + // class so that it will be able to call them. + friend class type_safe_union<int, float, std::string>; + +}; + +// ---------------------------------------------------------------------------------------- + +int main() +{ + pipe_example pe; + + // Make one of our type_safe_union objects + tsu_type msg; + + // Treat our msg as a float and assign it 4.567 + msg.get<float>() = 4.567f; + // Now put the message into the pipe + pe.message_pipe.enqueue(msg); + + // Put a string into the pipe + msg.get<std::string>() = "string message"; + pe.message_pipe.enqueue(msg); + + // And now an int + msg.get<int>() = 7; + pe.message_pipe.enqueue(msg); + + // And another string + msg.get<std::string>() = "yet another string message"; + pe.message_pipe.enqueue(msg); + + + // the main function won't really terminate here. It will call the destructor for pe + // which will block until all the messages have been processed. +} + +// ---------------------------------------------------------------------------------------- + diff --git a/ml/dlib/examples/quantum_computing_ex.cpp b/ml/dlib/examples/quantum_computing_ex.cpp new file mode 100644 index 00000000..fcc7c845 --- /dev/null +++ b/ml/dlib/examples/quantum_computing_ex.cpp @@ -0,0 +1,337 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This is an example illustrating the use of the quantum computing + simulation classes from the dlib C++ Library. + + This example assumes you are familiar with quantum computing and + Grover's search algorithm and Shor's 9 bit error correcting code + in particular. The example shows how to simulate both of these + algorithms. + + + The code to simulate Grover's algorithm is primarily here to show + you how to make custom quantum gate objects. The Shor ECC example + is simpler and uses just the default gates that come with the + library. + +*/ + + +#include <iostream> +#include <complex> +#include <ctime> +#include <dlib/quantum_computing.h> +#include <dlib/string.h> + + +using namespace std; +using namespace dlib; + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + +// This declares a random number generator that we will be using below +dlib::rand rnd; + +// ---------------------------------------------------------------------------------------- + +void shor_encode ( + quantum_register& reg +) +/*! + requires + - reg.num_bits() == 1 + ensures + - #reg.num_bits() == 9 + - #reg == the Shor error coding of the input register +!*/ +{ + DLIB_CASSERT(reg.num_bits() == 1,""); + + quantum_register zeros; + zeros.set_num_bits(8); + reg.append(zeros); + + using namespace dlib::quantum_gates; + const gate<1> h = hadamard(); + const gate<1> i = noop(); + + // Note that the expression (h,i) represents the tensor product of the 1 qubit + // h gate with the 1 qubit i gate and larger versions of this expression + // represent even bigger tensor products. So as you see below, we make gates + // big enough to apply to our quantum register by listing out all the gates we + // want to go into the tensor product and then we just apply the resulting gate + // to the quantum register. + + // Now apply the gates that constitute Shor's encoding to the input register. + (cnot<3,0>(),i,i,i,i,i).apply_gate_to(reg); + (cnot<6,0>(),i,i).apply_gate_to(reg); + (h,i,i,h,i,i,h,i,i).apply_gate_to(reg); + (cnot<1,0>(),i,cnot<1,0>(),i,cnot<1,0>(),i).apply_gate_to(reg); + (cnot<2,0>(),cnot<2,0>(),cnot<2,0>()).apply_gate_to(reg); +} + +// ---------------------------------------------------------------------------------------- + +void shor_decode ( + quantum_register& reg +) +/*! + requires + - reg.num_bits() == 9 + ensures + - #reg.num_bits() == 1 + - #reg == the decoded qubit that was in the given input register +!*/ +{ + DLIB_CASSERT(reg.num_bits() == 9,""); + + using namespace dlib::quantum_gates; + const gate<1> h = hadamard(); + const gate<1> i = noop(); + + // Now apply the gates that constitute Shor's decoding to the input register + + (cnot<2,0>(),cnot<2,0>(),cnot<2,0>()).apply_gate_to(reg); + (cnot<1,0>(),i,cnot<1,0>(),i,cnot<1,0>(),i).apply_gate_to(reg); + + (toffoli<0,1,2>(),toffoli<0,1,2>(),toffoli<0,1,2>()).apply_gate_to(reg); + + (h,i,i,h,i,i,h,i,i).apply_gate_to(reg); + + (cnot<6,0>(),i,i).apply_gate_to(reg); + (cnot<3,0>(),i,i,i,i,i).apply_gate_to(reg); + (toffoli<0,3,6>(),i,i).apply_gate_to(reg); + + // Now that we have decoded the value we don't need the extra 8 bits any more so + // remove them from the register. + for (int i = 0; i < 8; ++i) + reg.measure_and_remove_bit(0,rnd); +} + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + +// This is the function we will use in Grover's search algorithm. In this +// case the value we are searching for is 257. +bool is_key (unsigned long n) +{ + return n == 257; +} + +// ---------------------------------------------------------------------------------------- + +template <int bits> +class uf_gate; + +namespace dlib { +template <int bits> +struct gate_traits<uf_gate<bits> > +{ + static const long num_bits = bits; + static const long dims = dlib::qc_helpers::exp_2_n<num_bits>::value; +};} + +template <int bits> +class uf_gate : public gate_exp<uf_gate<bits> > +{ + /*! + This gate represents the black box function in Grover's search algorithm. + That is, it is the gate defined as follows: + Uf|x>|y> = |x>|y XOR is_key(x)> + + See the documentation for the gate_exp object for the details regarding + the compute_state_element() and operator() functions defined below. + !*/ +public: + uf_gate() : gate_exp<uf_gate>(*this) {} + + static const long num_bits = gate_traits<uf_gate>::num_bits; + static const long dims = gate_traits<uf_gate>::dims; + + const qc_scalar_type operator() (long r, long c) const + { + unsigned long output = c; + // if the input control bit is set + if (is_key(output>>1)) + { + output = output^0x1; + } + + if ((unsigned long)r == output) + return 1; + else + return 0; + } + + template <typename exp> + qc_scalar_type compute_state_element ( + const matrix_exp<exp>& reg, + long row_idx + ) const + { + unsigned long output = row_idx; + // if the input control bit is set + if (is_key(output>>1)) + { + output = output^0x1; + } + + return reg(output); + } +}; + +// ---------------------------------------------------------------------------------------- + +template <int bits> +class w_gate; + +namespace dlib { +template <int bits> +struct gate_traits<w_gate<bits> > +{ + static const long num_bits = bits; + static const long dims = dlib::qc_helpers::exp_2_n<num_bits>::value; +}; } + +template <int bits> +class w_gate : public gate_exp<w_gate<bits> > +{ + /*! + This is the W gate from the Grover algorithm + !*/ +public: + + w_gate() : gate_exp<w_gate>(*this) {} + + static const long num_bits = gate_traits<w_gate>::num_bits; + static const long dims = gate_traits<w_gate>::dims; + + const qc_scalar_type operator() (long r, long c) const + { + qc_scalar_type res = 2.0/dims; + if (r != c) + return res; + else + return res - 1.0; + } + + template <typename exp> + qc_scalar_type compute_state_element ( + const matrix_exp<exp>& reg, + long row_idx + ) const + { + qc_scalar_type temp = sum(reg)*2.0/dims; + // compute this value: temp = temp - reg(row_idx)*2.0/dims + reg(row_idx)*(2.0/dims - 1.0) + temp = temp - reg(row_idx); + + return temp; + } +}; + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + +int main() +{ + // seed the random number generator + rnd.set_seed(cast_to_string(time(0))); + + // Pick out some of the gates we will be using below + using namespace dlib::quantum_gates; + const gate<1> h = quantum_gates::hadamard(); + const gate<1> z = quantum_gates::z(); + const gate<1> x = quantum_gates::x(); + const gate<1> i = quantum_gates::noop(); + + quantum_register reg; + + // We will be doing the 12 qubit version of Grover's search algorithm. + const int bits=12; + reg.set_num_bits(bits); + + + // set the quantum register to its initial state + (i,i, i,i,i,i,i, i,i,i,i,x).apply_gate_to(reg); + + // Print out the starting bits + cout << "starting bits: "; + for (int i = reg.num_bits()-1; i >= 0; --i) + cout << reg.probability_of_bit(i); + cout << endl; + + + // Now apply the Hadamard gate to all the input bits + (h,h, h,h,h,h,h, h,h,h,h,h).apply_gate_to(reg); + + // Here we do the grover iteration + for (int j = 0; j < 35; ++j) + { + (uf_gate<bits>()).apply_gate_to(reg); + (w_gate<bits-1>(),i).apply_gate_to(reg); + + + cout << j << " probability: bit 1 = " << reg.probability_of_bit(1) << ", bit 9 = " << reg.probability_of_bit(9) << endl; + } + + cout << endl; + + // Print out the final probability of measuring a 1 for each of the bits + for (int i = reg.num_bits()-1; i >= 1; --i) + cout << "probability for bit " << i << " = " << reg.probability_of_bit(i) << endl; + cout << endl; + + cout << "The value we want grover's search to find is 257 which means we should measure a bit pattern of 00100000001" << endl; + cout << "Measured bits: "; + // finally, measure all the bits and print out what they are. + for (int i = reg.num_bits()-1; i >= 1; --i) + cout << reg.measure_bit(i,rnd); + cout << endl; + + + + + + // Now let's test out the Shor 9 bit encoding + cout << "\n\n\n\nNow let's try playing around with Shor's 9bit error correcting code" << endl; + + // Reset the quantum register to contain a single bit + reg.set_num_bits(1); + // Set the state of this single qubit to some random mixture of the two computational bases + reg.state_vector()(0) = qc_scalar_type(rnd.get_random_double(),rnd.get_random_double()); + reg.state_vector()(1) = qc_scalar_type(rnd.get_random_double(),rnd.get_random_double()); + // Make sure the state of the quantum register is a unit vector + reg.state_vector() /= sqrt(sum(norm(reg.state_vector()))); + + cout << "state: " << trans(reg.state_vector()); + + shor_encode(reg); + cout << "x bit corruption on bit 8" << endl; + (x,i,i,i,i,i,i,i,i).apply_gate_to(reg); // mess up the high order bit + shor_decode(reg); // try to decode the register + + cout << "state: " << trans(reg.state_vector()); + + shor_encode(reg); + cout << "x bit corruption on bit 1" << endl; + (i,i,i,i,i,i,i,x,i).apply_gate_to(reg); + shor_decode(reg); + + cout << "state: " << trans(reg.state_vector()); + + shor_encode(reg); + cout << "z bit corruption on bit 8" << endl; + (z,i,i,i,i,i,i,i,i).apply_gate_to(reg); + shor_decode(reg); + + cout << "state: " << trans(reg.state_vector()); + + cout << "\nThe state of the input qubit survived all the corruptions in tact so the code works." << endl; + +} + + diff --git a/ml/dlib/examples/queue_ex.cpp b/ml/dlib/examples/queue_ex.cpp new file mode 100644 index 00000000..c79bf25e --- /dev/null +++ b/ml/dlib/examples/queue_ex.cpp @@ -0,0 +1,78 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the queue component (and + to some degree the general idea behind most of the other container + classes) from the dlib C++ Library. + + It loads a queue with 20 random numbers. Then it uses the enumerable + interface to print them all to the screen. Then it sorts the numbers and + prints them to the screen. +*/ + + + + +#include <dlib/queue.h> +#include <iostream> +#include <iomanip> +#include <ctime> +#include <cstdlib> + + +// I'm picking the version of the queue that is kernel_2a extended by +// the queue sorting extension. This is just a normal queue but with the +// added member function sort() which sorts the queue. +typedef dlib::queue<int>::sort_1b_c queue_of_int; + + +using namespace std; +using namespace dlib; + + +int main() +{ + queue_of_int q; + + // initialize rand() + srand(time(0)); + + for (int i = 0; i < 20; ++i) + { + int a = rand()&0xFF; + + // note that adding a to the queue "consumes" the value of a because + // all container classes move values around by swapping them rather + // than copying them. So a is swapped into the queue which results + // in a having an initial value for its type (for int types that value + // is just some undefined value. ) + q.enqueue(a); + + } + + + cout << "The contents of the queue are:\n"; + while (q.move_next()) + cout << q.element() << " "; + + cout << "\n\nNow we sort the queue and its contents are:\n"; + q.sort(); // note that we don't have to call q.reset() to put the enumerator + // back at the start of the queue because calling sort() does + // that automatically for us. (In general, modifying a container + // will reset the enumerator). + while (q.move_next()) + cout << q.element() << " "; + + + cout << "\n\nNow we remove the numbers from the queue:\n"; + while (q.size() > 0) + { + int a; + q.dequeue(a); + cout << a << " "; + } + + + cout << endl; +} + diff --git a/ml/dlib/examples/random_cropper_ex.cpp b/ml/dlib/examples/random_cropper_ex.cpp new file mode 100644 index 00000000..6b020058 --- /dev/null +++ b/ml/dlib/examples/random_cropper_ex.cpp @@ -0,0 +1,99 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + When you are training a convolutional neural network using the loss_mmod loss + layer, you need to generate a bunch of identically sized training images. The + random_cropper is a convenient tool to help you crop out a bunch of + identically sized images from a training dataset. + + This example shows you what it does exactly and talks about some of its options. +*/ + + +#include <iostream> +#include <dlib/data_io.h> +#include <dlib/gui_widgets.h> +#include <dlib/image_transforms.h> + +using namespace std; +using namespace dlib; + +// ---------------------------------------------------------------------------------------- + +int main(int argc, char** argv) try +{ + if (argc != 2) + { + cout << "Give an image dataset XML file to run this program." << endl; + cout << "For example, if you are running from the examples folder then run this program by typing" << endl; + cout << " ./random_cropper_ex faces/training.xml" << endl; + cout << endl; + return 0; + } + + // First lets load a dataset + std::vector<matrix<rgb_pixel>> images; + std::vector<std::vector<mmod_rect>> boxes; + load_image_dataset(images, boxes, argv[1]); + + // Here we make our random_cropper. It has a number of options. + random_cropper cropper; + // We can tell it how big we want the cropped images to be. + cropper.set_chip_dims(400,400); + // Also, when doing cropping, it will map the object annotations from the + // dataset to the cropped image as well as perform random scale jittering. + // You can tell it how much scale jittering you would like by saying "please + // make the objects in the crops have a min and max size of such and such". + // You do that by calling these two functions. Here we are saying we want the + // objects in our crops to be no more than 0.8*400 pixels in height and width. + cropper.set_max_object_size(0.8); + // And also that they shouldn't be too small. Specifically, each object's smallest + // dimension (i.e. height or width) should be at least 60 pixels and at least one of + // the dimensions must be at least 80 pixels. So the smallest objects the cropper will + // output will be either 80x60 or 60x80. + cropper.set_min_object_size(80,60); + // The cropper can also randomly mirror and rotate crops, which we ask it to + // perform as well. + cropper.set_randomly_flip(true); + cropper.set_max_rotation_degrees(50); + // This fraction of crops are from random parts of images, rather than being centered + // on some object. + cropper.set_background_crops_fraction(0.2); + + // Now ask the cropper to generate a bunch of crops. The output is stored in + // crops and crop_boxes. + std::vector<matrix<rgb_pixel>> crops; + std::vector<std::vector<mmod_rect>> crop_boxes; + // Make 1000 crops. + cropper(1000, images, boxes, crops, crop_boxes); + + // Finally, lets look at the results + image_window win; + for (size_t i = 0; i < crops.size(); ++i) + { + win.clear_overlay(); + win.set_image(crops[i]); + for (auto b : crop_boxes[i]) + { + // Note that mmod_rect has an ignore field. If an object was labeled + // ignore in boxes then it will still be labeled as ignore in + // crop_boxes. Moreover, objects that are not well contained within + // the crop are also set to ignore. + if (b.ignore) + win.add_overlay(b.rect, rgb_pixel(255,255,0)); // draw ignored boxes as orange + else + win.add_overlay(b.rect, rgb_pixel(255,0,0)); // draw other boxes as red + } + cout << "Hit enter to view the next random crop."; + cin.get(); + } + +} +catch(std::exception& e) +{ + cout << e.what() << endl; +} + + + + + diff --git a/ml/dlib/examples/rank_features_ex.cpp b/ml/dlib/examples/rank_features_ex.cpp new file mode 100644 index 00000000..548db4be --- /dev/null +++ b/ml/dlib/examples/rank_features_ex.cpp @@ -0,0 +1,152 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the rank_features() function + from the dlib C++ Library. + + This example creates a simple set of data and then shows + you how to use the rank_features() function to find a good + set of features (where "good" means the feature set will probably + work well with a classification algorithm). + + The data used in this example will be 4 dimensional data and will + come from a distribution where points with a distance less than 10 + from the origin are labeled +1 and all other points are labeled + as -1. Note that this data is conceptually 2 dimensional but we + will add two extra features for the purpose of showing what + the rank_features() function does. +*/ + + +#include <iostream> +#include <dlib/svm.h> +#include <dlib/rand.h> +#include <vector> + +using namespace std; +using namespace dlib; + + +int main() +{ + + // This first typedef declares a matrix with 4 rows and 1 column. It will be the + // object that contains each of our 4 dimensional samples. + typedef matrix<double, 4, 1> sample_type; + + + + // Now let's make some vector objects that can hold our samples + std::vector<sample_type> samples; + std::vector<double> labels; + + dlib::rand rnd; + + for (int x = -30; x <= 30; ++x) + { + for (int y = -30; y <= 30; ++y) + { + sample_type samp; + + // the first two features are just the (x,y) position of our points and so + // we expect them to be good features since our two classes here are points + // close to the origin and points far away from the origin. + samp(0) = x; + samp(1) = y; + + // This is a worthless feature since it is just random noise. It should + // be indicated as worthless by the rank_features() function below. + samp(2) = rnd.get_random_double(); + + // This is a version of the y feature that is corrupted by random noise. It + // should be ranked as less useful than features 0, and 1, but more useful + // than the above feature. + samp(3) = y*0.2 + (rnd.get_random_double()-0.5)*10; + + // add this sample into our vector of samples. + samples.push_back(samp); + + // if this point is less than 15 from the origin then label it as a +1 class point. + // otherwise it is a -1 class point + if (sqrt((double)x*x + y*y) <= 15) + labels.push_back(+1); + else + labels.push_back(-1); + } + } + + + // Here we normalize all the samples by subtracting their mean and dividing by their standard deviation. + // This is generally a good idea since it often heads off numerical stability problems and also + // prevents one large feature from smothering others. + const sample_type m(mean(mat(samples))); // compute a mean vector + const sample_type sd(reciprocal(stddev(mat(samples)))); // compute a standard deviation vector + // now normalize each sample + for (unsigned long i = 0; i < samples.size(); ++i) + samples[i] = pointwise_multiply(samples[i] - m, sd); + + // This is another thing that is often good to do from a numerical stability point of view. + // However, in our case it doesn't really matter. It's just here to show you how to do it. + randomize_samples(samples,labels); + + + + // This is a typedef for the type of kernel we are going to use in this example. + // In this case I have selected the radial basis kernel that can operate on our + // 4D sample_type objects. In general, I would suggest using the same kernel for + // classification and feature ranking. + typedef radial_basis_kernel<sample_type> kernel_type; + + // The radial_basis_kernel has a parameter called gamma that we need to set. Generally, + // you should try the same gamma that you are using for training. But if you don't + // have a particular gamma in mind then you can use the following function to + // find a reasonable default gamma for your data. Another reasonable way to pick a gamma + // is often to use 1.0/compute_mean_squared_distance(randomly_subsample(samples, 2000)). + // It computes the mean squared distance between 2000 randomly selected samples and often + // works quite well. + const double gamma = verbose_find_gamma_with_big_centroid_gap(samples, labels); + + // Next we declare an instance of the kcentroid object. It is used by rank_features() + // two represent the centroids of the two classes. The kcentroid has 3 parameters + // you need to set. The first argument to the constructor is the kernel we wish to + // use. The second is a parameter that determines the numerical accuracy with which + // the object will perform part of the ranking algorithm. Generally, smaller values + // give better results but cause the algorithm to attempt to use more dictionary vectors + // (and thus run slower and use more memory). The third argument, however, is the + // maximum number of dictionary vectors a kcentroid is allowed to use. So you can use + // it to put an upper limit on the runtime complexity. + kcentroid<kernel_type> kc(kernel_type(gamma), 0.001, 25); + + // And finally we get to the feature ranking. Here we call rank_features() with the kcentroid we just made, + // the samples and labels we made above, and the number of features we want it to rank. + cout << rank_features(kc, samples, labels) << endl; + + // The output is: + /* + 0 0.749265 + 1 1 + 3 0.933378 + 2 0.825179 + */ + + // The first column is a list of the features in order of decreasing goodness. So the rank_features() function + // is telling us that the samples[i](0) and samples[i](1) (i.e. the x and y) features are the best two. Then + // after that the next best feature is the samples[i](3) (i.e. the y corrupted by noise) and finally the worst + // feature is the one that is just random noise. So in this case rank_features did exactly what we would + // intuitively expect. + + + // The second column of the matrix is a number that indicates how much the features up to that point + // contribute to the separation of the two classes. So bigger numbers are better since they + // indicate a larger separation. The max value is always 1. In the case below we see that the bad + // features actually make the class separation go down. + + // So to break it down a little more. + // 0 0.749265 <-- class separation of feature 0 all by itself + // 1 1 <-- class separation of feature 0 and 1 + // 3 0.933378 <-- class separation of feature 0, 1, and 3 + // 2 0.825179 <-- class separation of feature 0, 1, 3, and 2 + + +} + diff --git a/ml/dlib/examples/running_stats_ex.cpp b/ml/dlib/examples/running_stats_ex.cpp new file mode 100644 index 00000000..d94faf35 --- /dev/null +++ b/ml/dlib/examples/running_stats_ex.cpp @@ -0,0 +1,58 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This is an example illustrating the use of the running_stats object from the dlib C++ + Library. It is a simple tool for computing basic statistics on a stream of numbers. + In this example, we sample 100 points from the sinc function and then then compute the + unbiased sample mean, variance, skewness, and excess kurtosis. + +*/ +#include <iostream> +#include <vector> +#include <dlib/statistics.h> + +using namespace std; +using namespace dlib; + +// Here we define the sinc function so that we may generate sample data. We compute the mean, +// variance, skewness, and excess kurtosis of this sample data. + +double sinc(double x) +{ + if (x == 0) + return 1; + return sin(x)/x; +} + +int main() +{ + running_stats<double> rs; + + double tp1 = 0; + double tp2 = 0; + + // We first generate the data and add it sequentially to our running_stats object. We + // then print every fifth data point. + for (int x = 1; x <= 100; x++) + { + tp1 = x/100.0; + tp2 = sinc(pi*x/100.0); + rs.add(tp2); + + if(x % 5 == 0) + { + cout << " x = " << tp1 << " sinc(x) = " << tp2 << endl; + } + } + + // Finally, we compute and print the mean, variance, skewness, and excess kurtosis of + // our data. + + cout << endl; + cout << "Mean: " << rs.mean() << endl; + cout << "Variance: " << rs.variance() << endl; + cout << "Skewness: " << rs.skewness() << endl; + cout << "Excess Kurtosis " << rs.ex_kurtosis() << endl; + + return 0; +} + diff --git a/ml/dlib/examples/rvm_ex.cpp b/ml/dlib/examples/rvm_ex.cpp new file mode 100644 index 00000000..d1d5935e --- /dev/null +++ b/ml/dlib/examples/rvm_ex.cpp @@ -0,0 +1,217 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the relevance vector machine + utilities from the dlib C++ Library. + + This example creates a simple set of data to train on and then shows + you how to use the cross validation and rvm training functions + to find a good decision function that can classify examples in our + data set. + + + The data used in this example will be 2 dimensional data and will + come from a distribution where points with a distance less than 10 + from the origin are labeled +1 and all other points are labeled + as -1. + +*/ + + +#include <iostream> +#include <dlib/svm.h> + +using namespace std; +using namespace dlib; + + +int main() +{ + // The rvm functions use column vectors to contain a lot of the data on which they + // operate. So the first thing we do here is declare a convenient typedef. + + // This typedef declares a matrix with 2 rows and 1 column. It will be the + // object that contains each of our 2 dimensional samples. (Note that if you wanted + // more than 2 features in this vector you can simply change the 2 to something else. + // Or if you don't know how many features you want until runtime then you can put a 0 + // here and use the matrix.set_size() member function) + typedef matrix<double, 2, 1> sample_type; + + // This is a typedef for the type of kernel we are going to use in this example. + // In this case I have selected the radial basis kernel that can operate on our + // 2D sample_type objects + typedef radial_basis_kernel<sample_type> kernel_type; + + + // Now we make objects to contain our samples and their respective labels. + std::vector<sample_type> samples; + std::vector<double> labels; + + // Now let's put some data into our samples and labels objects. We do this + // by looping over a bunch of points and labeling them according to their + // distance from the origin. + for (int r = -20; r <= 20; ++r) + { + for (int c = -20; c <= 20; ++c) + { + sample_type samp; + samp(0) = r; + samp(1) = c; + samples.push_back(samp); + + // if this point is less than 10 from the origin + if (sqrt((double)r*r + c*c) <= 10) + labels.push_back(+1); + else + labels.push_back(-1); + + } + } + + + // Here we normalize all the samples by subtracting their mean and dividing by their standard deviation. + // This is generally a good idea since it often heads off numerical stability problems and also + // prevents one large feature from smothering others. Doing this doesn't matter much in this example + // so I'm just doing this here so you can see an easy way to accomplish this with + // the library. + vector_normalizer<sample_type> normalizer; + // let the normalizer learn the mean and standard deviation of the samples + normalizer.train(samples); + // now normalize each sample + for (unsigned long i = 0; i < samples.size(); ++i) + samples[i] = normalizer(samples[i]); + + + + + // Now that we have some data we want to train on it. However, there is a parameter to the + // training. This is the gamma parameter of the RBF kernel. Our choice for this parameter will + // influence how good the resulting decision function is. To test how good a particular choice of + // kernel parameters is we can use the cross_validate_trainer() function to perform n-fold cross + // validation on our training data. However, there is a problem with the way we have sampled + // our distribution. The problem is that there is a definite ordering to the samples. + // That is, the first half of the samples look like they are from a different distribution + // than the second half. This would screw up the cross validation process but we can + // fix it by randomizing the order of the samples with the following function call. + randomize_samples(samples, labels); + + + // here we make an instance of the rvm_trainer object that uses our kernel type. + rvm_trainer<kernel_type> trainer; + + // One thing you can do to reduce the RVM training time is to make its + // stopping epsilon bigger. However, this might make the outputs less + // reliable. But sometimes it works out well. 0.001 is the default. + trainer.set_epsilon(0.001); + // You can also set an explicit limit on the number of iterations used by the numeric + // solver. The default is 2000. + trainer.set_max_iterations(2000); + + // Now we loop over some different gamma values to see how good they are. Note + // that this is a very simple way to try out a few possible parameter choices. You + // should look at the model_selection_ex.cpp program for examples of more sophisticated + // strategies for determining good parameter choices. + cout << "doing cross validation" << endl; + for (double gamma = 0.000001; gamma <= 1; gamma *= 5) + { + // tell the trainer the parameters we want to use + trainer.set_kernel(kernel_type(gamma)); + + cout << "gamma: " << gamma; + // Print out the cross validation accuracy for 3-fold cross validation using the current gamma. + // cross_validate_trainer() returns a row vector. The first element of the vector is the fraction + // of +1 training examples correctly classified and the second number is the fraction of -1 training + // examples correctly classified. + cout << " cross validation accuracy: " << cross_validate_trainer(trainer, samples, labels, 3); + } + + + // From looking at the output of the above loop it turns out that a good value for + // gamma for this problem is 0.08. So that is what we will use. + + // Now we train on the full set of data and obtain the resulting decision function. We use the + // value of 0.08 for gamma. The decision function will return values >= 0 for samples it predicts + // are in the +1 class and numbers < 0 for samples it predicts to be in the -1 class. + trainer.set_kernel(kernel_type(0.08)); + typedef decision_function<kernel_type> dec_funct_type; + typedef normalized_function<dec_funct_type> funct_type; + + + // Here we are making an instance of the normalized_function object. This object provides a convenient + // way to store the vector normalization information along with the decision function we are + // going to learn. + funct_type learned_function; + learned_function.normalizer = normalizer; // save normalization information + learned_function.function = trainer.train(samples, labels); // perform the actual RVM training and save the results + + // Print out the number of relevance vectors in the resulting decision function. + cout << "\nnumber of relevance vectors in our learned_function is " + << learned_function.function.basis_vectors.size() << endl; + + // Now let's try this decision_function on some samples we haven't seen before + sample_type sample; + + sample(0) = 3.123; + sample(1) = 2; + cout << "This is a +1 class example, the classifier output is " << learned_function(sample) << endl; + + sample(0) = 3.123; + sample(1) = 9.3545; + cout << "This is a +1 class example, the classifier output is " << learned_function(sample) << endl; + + sample(0) = 13.123; + sample(1) = 9.3545; + cout << "This is a -1 class example, the classifier output is " << learned_function(sample) << endl; + + sample(0) = 13.123; + sample(1) = 0; + cout << "This is a -1 class example, the classifier output is " << learned_function(sample) << endl; + + + // We can also train a decision function that reports a well conditioned probability + // instead of just a number > 0 for the +1 class and < 0 for the -1 class. An example + // of doing that follows: + typedef probabilistic_decision_function<kernel_type> probabilistic_funct_type; + typedef normalized_function<probabilistic_funct_type> pfunct_type; + + pfunct_type learned_pfunct; + learned_pfunct.normalizer = normalizer; + learned_pfunct.function = train_probabilistic_decision_function(trainer, samples, labels, 3); + // Now we have a function that returns the probability that a given sample is of the +1 class. + + // print out the number of relevance vectors in the resulting decision function. + // (it should be the same as in the one above) + cout << "\nnumber of relevance vectors in our learned_pfunct is " + << learned_pfunct.function.decision_funct.basis_vectors.size() << endl; + + sample(0) = 3.123; + sample(1) = 2; + cout << "This +1 class example should have high probability. Its probability is: " + << learned_pfunct(sample) << endl; + + sample(0) = 3.123; + sample(1) = 9.3545; + cout << "This +1 class example should have high probability. Its probability is: " + << learned_pfunct(sample) << endl; + + sample(0) = 13.123; + sample(1) = 9.3545; + cout << "This -1 class example should have low probability. Its probability is: " + << learned_pfunct(sample) << endl; + + sample(0) = 13.123; + sample(1) = 0; + cout << "This -1 class example should have low probability. Its probability is: " + << learned_pfunct(sample) << endl; + + + + // Another thing that is worth knowing is that just about everything in dlib is serializable. + // So for example, you can save the learned_pfunct object to disk and recall it later like so: + serialize("saved_function.dat") << learned_pfunct; + + // Now let's open that file back up and load the function object it contains. + deserialize("saved_function.dat") >> learned_pfunct; + +} + diff --git a/ml/dlib/examples/rvm_regression_ex.cpp b/ml/dlib/examples/rvm_regression_ex.cpp new file mode 100644 index 00000000..d65cb520 --- /dev/null +++ b/ml/dlib/examples/rvm_regression_ex.cpp @@ -0,0 +1,101 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This is an example illustrating the use of the RVM regression object + from the dlib C++ Library. + + This example will train on data from the sinc function. + +*/ + +#include <iostream> +#include <vector> + +#include <dlib/svm.h> + +using namespace std; +using namespace dlib; + +// Here is the sinc function we will be trying to learn with rvm regression +double sinc(double x) +{ + if (x == 0) + return 1; + return sin(x)/x; +} + +int main() +{ + // Here we declare that our samples will be 1 dimensional column vectors. + typedef matrix<double,1,1> sample_type; + + // Now sample some points from the sinc() function + sample_type m; + std::vector<sample_type> samples; + std::vector<double> labels; + for (double x = -10; x <= 4; x += 1) + { + m(0) = x; + samples.push_back(m); + labels.push_back(sinc(x)); + } + + // Now we are making a typedef for the kind of kernel we want to use. I picked the + // radial basis kernel because it only has one parameter and generally gives good + // results without much fiddling. + typedef radial_basis_kernel<sample_type> kernel_type; + + // Here we declare an instance of the rvm_regression_trainer object. This is the + // object that we will later use to do the training. + rvm_regression_trainer<kernel_type> trainer; + + // Here we set the kernel we want to use for training. The radial_basis_kernel + // has a parameter called gamma that we need to determine. As a rule of thumb, a good + // gamma to try is 1.0/(mean squared distance between your sample points). So + // below we are using a similar value. Note also that using an inappropriately large + // gamma will cause the RVM training algorithm to run extremely slowly. What + // "large" means is relative to how spread out your data is. So it is important + // to use a rule like this as a starting point for determining the gamma value + // if you want to use the RVM. It is also probably a good idea to normalize your + // samples as shown in the rvm_ex.cpp example program. + const double gamma = 2.0/compute_mean_squared_distance(samples); + cout << "using gamma of " << gamma << endl; + trainer.set_kernel(kernel_type(gamma)); + + // One thing you can do to reduce the RVM training time is to make its + // stopping epsilon bigger. However, this might make the outputs less + // reliable. But sometimes it works out well. 0.001 is the default. + trainer.set_epsilon(0.001); + + // now train a function based on our sample points + decision_function<kernel_type> test = trainer.train(samples, labels); + + // now we output the value of the sinc function for a few test points as well as the + // value predicted by our regression. + m(0) = 2.5; cout << sinc(m(0)) << " " << test(m) << endl; + m(0) = 0.1; cout << sinc(m(0)) << " " << test(m) << endl; + m(0) = -4; cout << sinc(m(0)) << " " << test(m) << endl; + m(0) = 5.0; cout << sinc(m(0)) << " " << test(m) << endl; + + // The output is as follows: + //using gamma of 0.05 + //0.239389 0.240989 + //0.998334 0.999538 + //-0.189201 -0.188453 + //-0.191785 -0.226516 + + + // The first column is the true value of the sinc function and the second + // column is the output from the rvm estimate. + + + + // Another thing that is worth knowing is that just about everything in dlib is serializable. + // So for example, you can save the test object to disk and recall it later like so: + serialize("saved_function.dat") << test; + + // Now let's open that file back up and load the function object it contains. + deserialize("saved_function.dat") >> test; + +} + + diff --git a/ml/dlib/examples/sequence_labeler_ex.cpp b/ml/dlib/examples/sequence_labeler_ex.cpp new file mode 100644 index 00000000..bdb666a7 --- /dev/null +++ b/ml/dlib/examples/sequence_labeler_ex.cpp @@ -0,0 +1,392 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the machine learning + tools for sequence labeling in the dlib C++ Library. + + The general problem addressed by these tools is the following. + Suppose you have a set of sequences of some kind and you want to + learn to predict a label for each element of a sequence. So for + example, you might have a set of English sentences where each + word is labeled with its part of speech and you want to learn a + model which can predict the part of speech for each word in a new + sentence. + + Central to these tools is the sequence_labeler object. It is the + object which represents the label prediction model. In particular, + the model used by this object is the following. Given an input + sequence x, predict an output label sequence y such that: + y == argmax_y dot(weight_vector, PSI(x,y)) + where PSI() is supplied by the user and defines the form of the + model. In this example program we will define it such that we + obtain a simple Hidden Markov Model. However, it's possible to + define much more sophisticated models. You should take a look + at the following papers for a few examples: + - Hidden Markov Support Vector Machines by + Y. Altun, I. Tsochantaridis, T. Hofmann + - Shallow Parsing with Conditional Random Fields by + Fei Sha and Fernando Pereira + + + + In the remainder of this example program we will show how to + define your own PSI(), as well as how to learn the "weight_vector" + parameter. Once you have these two items you will be able to + use the sequence_labeler to predict the labels of new sequences. +*/ + + +#include <iostream> +#include <dlib/svm_threaded.h> +#include <dlib/rand.h> + +using namespace std; +using namespace dlib; + + +/* + In this example we will be working with a Hidden Markov Model where + the hidden nodes and observation nodes both take on 3 different states. + The task will be to take a sequence of observations and predict the state + of the corresponding hidden nodes. +*/ + +const unsigned long num_label_states = 3; +const unsigned long num_sample_states = 3; + +// ---------------------------------------------------------------------------------------- + +class feature_extractor +{ + /* + This object is where you define your PSI(). To ensure that the argmax_y + remains a tractable problem, the PSI(x,y) vector is actually a sum of vectors, + each derived from the entire input sequence x but only part of the label + sequence y. This allows the argmax_y to be efficiently solved using the + well known Viterbi algorithm. + */ + +public: + // This defines the type used to represent the observed sequence. You can use + // any type here so long as it has a .size() which returns the number of things + // in the sequence. + typedef std::vector<unsigned long> sequence_type; + + unsigned long num_features() const + /*! + ensures + - returns the dimensionality of the PSI() feature vector. + !*/ + { + // Recall that we are defining a HMM. So in this case the PSI() vector + // should have the same dimensionality as the number of parameters in the HMM. + return num_label_states*num_label_states + num_label_states*num_sample_states; + } + + unsigned long order() const + /*! + ensures + - This object represents a Markov model on the output labels. + This parameter defines the order of the model. That is, this + value controls how many previous label values get to be taken + into consideration when performing feature extraction for a + particular element of the input sequence. Note that the runtime + of the algorithm is exponential in the order. So don't make order + very large. + !*/ + { + // In this case we are using a HMM model that only looks at the + // previous label. + return 1; + } + + unsigned long num_labels() const + /*! + ensures + - returns the number of possible output labels. + !*/ + { + return num_label_states; + } + + template <typename feature_setter, typename EXP> + void get_features ( + feature_setter& set_feature, + const sequence_type& x, + const matrix_exp<EXP>& y, + unsigned long position + ) const + /*! + requires + - EXP::type == unsigned long + (i.e. y contains unsigned longs) + - position < x.size() + - y.size() == min(position, order) + 1 + - is_vector(y) == true + - max(y) < num_labels() + - set_feature is a function object which allows expressions of the form: + - set_features((unsigned long)feature_index, (double)feature_value); + - set_features((unsigned long)feature_index); + ensures + - for all valid i: + - interprets y(i) as the label corresponding to x[position-i] + - This function computes the part of PSI() corresponding to the x[position] + element of the input sequence. Moreover, this part of PSI() is returned as + a sparse vector by invoking set_feature(). For example, to set the feature + with an index of 55 to the value of 1 this method would call: + set_feature(55); + Or equivalently: + set_feature(55,1); + Therefore, the first argument to set_feature is the index of the feature + to be set while the second argument is the value the feature should take. + Additionally, note that calling set_feature() multiple times with the same + feature index does NOT overwrite the old value, it adds to the previous + value. For example, if you call set_feature(55) 3 times then it will + result in feature 55 having a value of 3. + - This function only calls set_feature() with feature_index values < num_features() + !*/ + { + // Again, the features below only define a simple HMM. But in general, you can + // use a wide variety of sophisticated feature extraction methods here. + + // Pull out an indicator feature for the type of transition between the + // previous label and the current label. + if (y.size() > 1) + set_feature(y(1)*num_label_states + y(0)); + + // Pull out an indicator feature for the type of observed node given + // the current label. + set_feature(num_label_states*num_label_states + + y(0)*num_sample_states + x[position]); + } +}; + +// We need to define serialize() and deserialize() for our feature extractor if we want +// to be able to serialize and deserialize our learned models. In this case the +// implementation is empty since our feature_extractor doesn't have any state. But you +// might define more complex feature extractors which have state that needs to be saved. +void serialize(const feature_extractor&, std::ostream&) {} +void deserialize(feature_extractor&, std::istream&) {} + +// ---------------------------------------------------------------------------------------- + +void make_dataset ( + const matrix<double>& transition_probabilities, + const matrix<double>& emission_probabilities, + std::vector<std::vector<unsigned long> >& samples, + std::vector<std::vector<unsigned long> >& labels, + unsigned long dataset_size +); +/*! + requires + - transition_probabilities.nr() == transition_probabilities.nc() + - transition_probabilities.nr() == emission_probabilities.nr() + - The rows of transition_probabilities and emission_probabilities must sum to 1. + (i.e. sum_cols(transition_probabilities) and sum_cols(emission_probabilities) + must evaluate to vectors of all 1s.) + ensures + - This function randomly samples a bunch of sequences from the HMM defined by + transition_probabilities and emission_probabilities. + - The HMM is defined by: + - The probability of transitioning from hidden state H1 to H2 + is given by transition_probabilities(H1,H2). + - The probability of a hidden state H producing an observed state + O is given by emission_probabilities(H,O). + - #samples.size() == #labels.size() == dataset_size + - for all valid i: + - #labels[i] is a randomly sampled sequence of hidden states from the + given HMM. #samples[i] is its corresponding randomly sampled sequence + of observed states. +!*/ + +// ---------------------------------------------------------------------------------------- + +int main() +{ + // We need a dataset to test the machine learning algorithms. So we are going to + // define a HMM based on the following two matrices and then randomly sample a + // set of data from it. Then we will see if the machine learning method can + // recover the HMM model from the training data. + + + matrix<double> transition_probabilities(num_label_states, num_label_states); + transition_probabilities = 0.05, 0.90, 0.05, + 0.05, 0.05, 0.90, + 0.90, 0.05, 0.05; + + matrix<double> emission_probabilities(num_label_states,num_sample_states); + emission_probabilities = 0.5, 0.5, 0.0, + 0.0, 0.5, 0.5, + 0.5, 0.0, 0.5; + + std::vector<std::vector<unsigned long> > samples; + std::vector<std::vector<unsigned long> > labels; + // sample 1000 labeled sequences from the HMM. + make_dataset(transition_probabilities,emission_probabilities, + samples, labels, 1000); + + // print out some of the randomly sampled sequences + for (int i = 0; i < 10; ++i) + { + cout << "hidden states: " << trans(mat(labels[i])); + cout << "observed states: " << trans(mat(samples[i])); + cout << "******************************" << endl; + } + + // Next we use the structural_sequence_labeling_trainer to learn our + // prediction model based on just the samples and labels. + structural_sequence_labeling_trainer<feature_extractor> trainer; + // This is the common SVM C parameter. Larger values encourage the + // trainer to attempt to fit the data exactly but might overfit. + // In general, you determine this parameter by cross-validation. + trainer.set_c(4); + // This trainer can use multiple CPU cores to speed up the training. + // So set this to the number of available CPU cores. + trainer.set_num_threads(4); + + + // Learn to do sequence labeling from the dataset + sequence_labeler<feature_extractor> labeler = trainer.train(samples, labels); + + // Test the learned labeler on one of the training samples. In this + // case it will give the correct sequence of labels. + std::vector<unsigned long> predicted_labels = labeler(samples[0]); + cout << "true hidden states: "<< trans(mat(labels[0])); + cout << "predicted hidden states: "<< trans(mat(predicted_labels)); + + + + // We can also do cross-validation. The confusion_matrix is defined as: + // - confusion_matrix(T,P) == the number of times a sequence element with label T + // was predicted to have a label of P. + // So if all predictions are perfect then only diagonal elements of this matrix will + // be non-zero. + matrix<double> confusion_matrix; + confusion_matrix = cross_validate_sequence_labeler(trainer, samples, labels, 4); + cout << "\ncross-validation: " << endl; + cout << confusion_matrix; + cout << "label accuracy: "<< sum(diag(confusion_matrix))/sum(confusion_matrix) << endl; + + // In this case, the label accuracy is about 88%. At this point, we want to know if + // the machine learning method was able to recover the HMM model from the data. So + // to test this, we can load the true HMM model into another sequence_labeler and + // test it out on the data and compare the results. + + matrix<double,0,1> true_hmm_model_weights = log(join_cols(reshape_to_column_vector(transition_probabilities), + reshape_to_column_vector(emission_probabilities))); + // With this model, labeler_true will predict the most probable set of labels + // given an input sequence. That is, it will predict using the equation: + // y == argmax_y dot(true_hmm_model_weights, PSI(x,y)) + sequence_labeler<feature_extractor> labeler_true(true_hmm_model_weights); + + confusion_matrix = test_sequence_labeler(labeler_true, samples, labels); + cout << "\nTrue HMM model: " << endl; + cout << confusion_matrix; + cout << "label accuracy: "<< sum(diag(confusion_matrix))/sum(confusion_matrix) << endl; + + // Happily, we observe that the true model also obtains a label accuracy of 88%. + + + + + + + // Finally, the labeler can be serialized to disk just like most dlib objects. + serialize("labeler.dat") << labeler; + + // recall from disk + deserialize("labeler.dat") >> labeler; +} + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- +// Code for creating a bunch of random samples from our HMM. +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + +void sample_hmm ( + dlib::rand& rnd, + const matrix<double>& transition_probabilities, + const matrix<double>& emission_probabilities, + unsigned long previous_label, + unsigned long& next_label, + unsigned long& next_sample +) +/*! + requires + - previous_label < transition_probabilities.nr() + - transition_probabilities.nr() == transition_probabilities.nc() + - transition_probabilities.nr() == emission_probabilities.nr() + - The rows of transition_probabilities and emission_probabilities must sum to 1. + (i.e. sum_cols(transition_probabilities) and sum_cols(emission_probabilities) + must evaluate to vectors of all 1s.) + ensures + - This function randomly samples the HMM defined by transition_probabilities + and emission_probabilities assuming that the previous hidden state + was previous_label. + - The HMM is defined by: + - P(next_label |previous_label) == transition_probabilities(previous_label, next_label) + - P(next_sample|next_label) == emission_probabilities (next_label, next_sample) + - #next_label == the sampled value of the hidden state + - #next_sample == the sampled value of the observed state +!*/ +{ + // sample next_label + double p = rnd.get_random_double(); + for (long c = 0; p >= 0 && c < transition_probabilities.nc(); ++c) + { + next_label = c; + p -= transition_probabilities(previous_label, c); + } + + // now sample next_sample + p = rnd.get_random_double(); + for (long c = 0; p >= 0 && c < emission_probabilities.nc(); ++c) + { + next_sample = c; + p -= emission_probabilities(next_label, c); + } +} + +// ---------------------------------------------------------------------------------------- + +void make_dataset ( + const matrix<double>& transition_probabilities, + const matrix<double>& emission_probabilities, + std::vector<std::vector<unsigned long> >& samples, + std::vector<std::vector<unsigned long> >& labels, + unsigned long dataset_size +) +{ + samples.clear(); + labels.clear(); + + dlib::rand rnd; + + // now randomly sample some labeled sequences from our Hidden Markov Model + for (unsigned long iter = 0; iter < dataset_size; ++iter) + { + const unsigned long sequence_size = rnd.get_random_32bit_number()%20+3; + std::vector<unsigned long> sample(sequence_size); + std::vector<unsigned long> label(sequence_size); + + unsigned long previous_label = rnd.get_random_32bit_number()%num_label_states; + for (unsigned long i = 0; i < sample.size(); ++i) + { + unsigned long next_label = 0, next_sample = 0; + sample_hmm(rnd, transition_probabilities, emission_probabilities, + previous_label, next_label, next_sample); + + label[i] = next_label; + sample[i] = next_sample; + + previous_label = next_label; + } + + samples.push_back(sample); + labels.push_back(label); + } +} + +// ---------------------------------------------------------------------------------------- + diff --git a/ml/dlib/examples/sequence_segmenter_ex.cpp b/ml/dlib/examples/sequence_segmenter_ex.cpp new file mode 100644 index 00000000..3b0eb8cd --- /dev/null +++ b/ml/dlib/examples/sequence_segmenter_ex.cpp @@ -0,0 +1,238 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This example shows how to use dlib to learn to do sequence segmentation. In a sequence + segmentation task we are given a sequence of objects (e.g. words in a sentence) and we + are supposed to detect certain subsequences (e.g. the names of people). Therefore, in + the code below we create some very simple training sequences and use them to learn a + sequence segmentation model. In particular, our sequences will be sentences + represented as arrays of words and our task will be to learn to identify person names. + Once we have our segmentation model we can use it to find names in new sentences, as we + will show. + +*/ + + +#include <iostream> +#include <cctype> +#include <dlib/svm_threaded.h> +#include <dlib/string.h> + +using namespace std; +using namespace dlib; + + +// ---------------------------------------------------------------------------------------- + +class feature_extractor +{ + /* + The sequence segmentation models we work with in this example are chain structured + conditional random field style models. Therefore, central to a sequence + segmentation model is a feature extractor object. This object defines all the + properties of the model such as how many features it will use, and more importantly, + how they are calculated. + */ + +public: + // This should be the type used to represent an input sequence. It can be + // anything so long as it has a .size() which returns the length of the sequence. + typedef std::vector<std::string> sequence_type; + + // The next four lines define high-level properties of the feature extraction model. + // See the documentation for the sequence_labeler object for an extended discussion of + // how they are used (note that the main body of the documentation is at the top of the + // file documenting the sequence_labeler). + const static bool use_BIO_model = true; + const static bool use_high_order_features = true; + const static bool allow_negative_weights = true; + unsigned long window_size() const { return 3; } + + // This function defines the dimensionality of the vectors output by the get_features() + // function defined below. + unsigned long num_features() const { return 1; } + + template <typename feature_setter> + void get_features ( + feature_setter& set_feature, + const sequence_type& sentence, + unsigned long position + ) const + /*! + requires + - position < sentence.size() + - set_feature is a function object which allows expressions of the form: + - set_features((unsigned long)feature_index, (double)feature_value); + - set_features((unsigned long)feature_index); + ensures + - This function computes a feature vector which should capture the properties + of sentence[position] that are informative relative to the sequence + segmentation task you are trying to perform. + - The output feature vector is returned as a sparse vector by invoking set_feature(). + For example, to set the feature with an index of 55 to the value of 1 + this method would call: + set_feature(55); + Or equivalently: + set_feature(55,1); + Therefore, the first argument to set_feature is the index of the feature + to be set while the second argument is the value the feature should take. + Additionally, note that calling set_feature() multiple times with the + same feature index does NOT overwrite the old value, it adds to the + previous value. For example, if you call set_feature(55) 3 times then it + will result in feature 55 having a value of 3. + - This function only calls set_feature() with feature_index values < num_features() + !*/ + { + // The model in this example program is very simple. Our features only look at the + // capitalization pattern of the words. So we have a single feature which checks + // if the first letter is capitalized or not. + if (isupper(sentence[position][0])) + set_feature(0); + } +}; + +// We need to define serialize() and deserialize() for our feature extractor if we want +// to be able to serialize and deserialize our learned models. In this case the +// implementation is empty since our feature_extractor doesn't have any state. But you +// might define more complex feature extractors which have state that needs to be saved. +void serialize(const feature_extractor&, std::ostream&) {} +void deserialize(feature_extractor&, std::istream&) {} + +// ---------------------------------------------------------------------------------------- + +void make_training_examples ( + std::vector<std::vector<std::string> >& samples, + std::vector<std::vector<std::pair<unsigned long, unsigned long> > >& segments +) +/*! + ensures + - This function fills samples with example sentences and segments with the + locations of person names that should be segmented out. + - #samples.size() == #segments.size() +!*/ +{ + std::vector<std::pair<unsigned long, unsigned long> > names; + + + // Here we make our first training example. split() turns the string into an array of + // 10 words and then we store that into samples. + samples.push_back(split("The other day I saw a man named Jim Smith")); + // We want to detect person names. So we note that the name is located within the + // range [8, 10). Note that we use half open ranges to identify segments. So in this + // case, the segment identifies the string "Jim Smith". + names.push_back(make_pair(8, 10)); + segments.push_back(names); names.clear(); + + // Now we add a few more example sentences + + samples.push_back(split("Davis King is the main author of the dlib Library")); + names.push_back(make_pair(0, 2)); + segments.push_back(names); names.clear(); + + + samples.push_back(split("Bob Jones is a name and so is George Clinton")); + names.push_back(make_pair(0, 2)); + names.push_back(make_pair(8, 10)); + segments.push_back(names); names.clear(); + + + samples.push_back(split("My dog is named Bob Barker")); + names.push_back(make_pair(4, 6)); + segments.push_back(names); names.clear(); + + + samples.push_back(split("ABC is an acronym but John James Smith is a name")); + names.push_back(make_pair(5, 8)); + segments.push_back(names); names.clear(); + + + samples.push_back(split("No names in this sentence at all")); + segments.push_back(names); names.clear(); +} + +// ---------------------------------------------------------------------------------------- + +void print_segment ( + const std::vector<std::string>& sentence, + const std::pair<unsigned long,unsigned long>& segment +) +{ + // Recall that a segment is a half open range starting with .first and ending just + // before .second. + for (unsigned long i = segment.first; i < segment.second; ++i) + cout << sentence[i] << " "; + cout << endl; +} + +// ---------------------------------------------------------------------------------------- + +int main() +{ + // Finally we make it into the main program body. So the first thing we do is get our + // training data. + std::vector<std::vector<std::string> > samples; + std::vector<std::vector<std::pair<unsigned long, unsigned long> > > segments; + make_training_examples(samples, segments); + + + // Next we use the structural_sequence_segmentation_trainer to learn our segmentation + // model based on just the samples and segments. But first we setup some of its + // parameters. + structural_sequence_segmentation_trainer<feature_extractor> trainer; + // This is the common SVM C parameter. Larger values encourage the trainer to attempt + // to fit the data exactly but might overfit. In general, you determine this parameter + // by cross-validation. + trainer.set_c(10); + // This trainer can use multiple CPU cores to speed up the training. So set this to + // the number of available CPU cores. + trainer.set_num_threads(4); + + + // Learn to do sequence segmentation from the dataset + sequence_segmenter<feature_extractor> segmenter = trainer.train(samples, segments); + + + // Let's print out all the segments our segmenter detects. + for (unsigned long i = 0; i < samples.size(); ++i) + { + // get all the detected segments in samples[i] + std::vector<std::pair<unsigned long,unsigned long> > seg = segmenter(samples[i]); + // Print each of them + for (unsigned long j = 0; j < seg.size(); ++j) + { + print_segment(samples[i], seg[j]); + } + } + + + // Now let's test it on a new sentence and see what it detects. + std::vector<std::string> sentence(split("There once was a man from Nantucket whose name rhymed with Bob Bucket")); + std::vector<std::pair<unsigned long,unsigned long> > seg = segmenter(sentence); + for (unsigned long j = 0; j < seg.size(); ++j) + { + print_segment(sentence, seg[j]); + } + + + + // We can also test the accuracy of the segmenter on a dataset. This statement simply + // tests on the training data. In this case we will see that it predicts everything + // correctly. + cout << "\nprecision, recall, f1-score: " << test_sequence_segmenter(segmenter, samples, segments); + // Similarly, we can do 5-fold cross-validation and print the results. Just as before, + // we see everything is predicted correctly. + cout << "precision, recall, f1-score: " << cross_validate_sequence_segmenter(trainer, samples, segments, 5); + + + + + + // Finally, the segmenter can be serialized to disk just like most dlib objects. + serialize("segmenter.dat") << segmenter; + + // recall from disk + deserialize("segmenter.dat") >> segmenter; +} + +// ---------------------------------------------------------------------------------------- + diff --git a/ml/dlib/examples/server_http_ex.cpp b/ml/dlib/examples/server_http_ex.cpp new file mode 100644 index 00000000..99914c14 --- /dev/null +++ b/ml/dlib/examples/server_http_ex.cpp @@ -0,0 +1,108 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This example illustrates the use of the HTTP extension to the server object + from the dlib C++ Library. + It creates a server that always responds with a simple HTML form. + + To view the page this program displays you should go to http://localhost:5000 + +*/ + +#include <iostream> +#include <sstream> +#include <string> +#include <dlib/server.h> + +using namespace dlib; +using namespace std; + +class web_server : public server_http +{ + const std::string on_request ( + const incoming_things& incoming, + outgoing_things& outgoing + ) + { + ostringstream sout; + // We are going to send back a page that contains an HTML form with two text input fields. + // One field called name. The HTML form uses the post method but could also use the get + // method (just change method='post' to method='get'). + sout << " <html> <body> " + << "<form action='/form_handler' method='post'> " + << "User Name: <input name='user' type='text'><br> " + << "User password: <input name='pass' type='text'> <input type='submit'> " + << " </form>"; + + // Write out some of the inputs to this request so that they show up on the + // resulting web page. + sout << "<br> path = " << incoming.path << endl; + sout << "<br> request_type = " << incoming.request_type << endl; + sout << "<br> content_type = " << incoming.content_type << endl; + sout << "<br> protocol = " << incoming.protocol << endl; + sout << "<br> foreign_ip = " << incoming.foreign_ip << endl; + sout << "<br> foreign_port = " << incoming.foreign_port << endl; + sout << "<br> local_ip = " << incoming.local_ip << endl; + sout << "<br> local_port = " << incoming.local_port << endl; + sout << "<br> body = \"" << incoming.body << "\"" << endl; + + + // If this request is the result of the user submitting the form then echo back + // the submission. + if (incoming.path == "/form_handler") + { + sout << "<h2> Stuff from the query string </h2>" << endl; + sout << "<br> user = " << incoming.queries["user"] << endl; + sout << "<br> pass = " << incoming.queries["pass"] << endl; + + // save these form submissions as cookies. + outgoing.cookies["user"] = incoming.queries["user"]; + outgoing.cookies["pass"] = incoming.queries["pass"]; + } + + + // Echo any cookies back to the client browser + sout << "<h2>Cookies the web browser sent to the server</h2>"; + for ( key_value_map::const_iterator ci = incoming.cookies.begin(); ci != incoming.cookies.end(); ++ci ) + { + sout << "<br/>" << ci->first << " = " << ci->second << endl; + } + + sout << "<br/><br/>"; + + sout << "<h2>HTTP Headers the web browser sent to the server</h2>"; + // Echo out all the HTTP headers we received from the client web browser + for ( key_value_map_ci::const_iterator ci = incoming.headers.begin(); ci != incoming.headers.end(); ++ci ) + { + sout << "<br/>" << ci->first << ": " << ci->second << endl; + } + + sout << "</body> </html>"; + + return sout.str(); + } + +}; + +int main() +{ + try + { + // create an instance of our web server + web_server our_web_server; + + // make it listen on port 5000 + our_web_server.set_listening_port(5000); + // Tell the server to begin accepting connections. + our_web_server.start_async(); + + cout << "Press enter to end this program" << endl; + cin.get(); + } + catch (exception& e) + { + cout << e.what() << endl; + } +} + + diff --git a/ml/dlib/examples/server_iostream_ex.cpp b/ml/dlib/examples/server_iostream_ex.cpp new file mode 100644 index 00000000..81fa3001 --- /dev/null +++ b/ml/dlib/examples/server_iostream_ex.cpp @@ -0,0 +1,84 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the server_iostream object from + the dlib C++ Library. + + This is a simple echo server. It listens on port 1234 for incoming + connections and just echos back any text it receives, but in upper case. So + basically it is the same as the sockets_ex.cpp example program except it + uses iostreams. + + To test it out you can just open a command prompt and type: + telnet localhost 1234 + + Then you can type away. + +*/ + + + + +#include <dlib/server.h> +#include <iostream> + +using namespace dlib; +using namespace std; + + + +class serv : public server_iostream +{ + + void on_connect ( + std::istream& in, + std::ostream& out, + const std::string& foreign_ip, + const std::string& local_ip, + unsigned short foreign_port, + unsigned short local_port, + uint64 connection_id + ) + { + // The details of the connection are contained in the last few arguments to + // on_connect(). For more information, see the documentation for the + // server_iostream. However, the main arguments of interest are the two streams. + // Here we also print the IP address of the remote machine. + cout << "Got a connection from " << foreign_ip << endl; + + // Loop until we hit the end of the stream. This happens when the connection + // terminates. + while (in.peek() != EOF) + { + // get the next character from the client + char ch = in.get(); + + // now echo it back to them + out << (char)toupper(ch); + } + } + +}; + + +int main() +{ + try + { + serv our_server; + + // set up the server object we have made + our_server.set_listening_port(1234); + // Tell the server to begin accepting connections. + our_server.start_async(); + + cout << "Press enter to end this program" << endl; + cin.get(); + } + catch (exception& e) + { + cout << e.what() << endl; + } +} + + diff --git a/ml/dlib/examples/sockets_ex.cpp b/ml/dlib/examples/sockets_ex.cpp new file mode 100644 index 00000000..5fd9ebe0 --- /dev/null +++ b/ml/dlib/examples/sockets_ex.cpp @@ -0,0 +1,63 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the sockets and + server components from the dlib C++ Library. + + This is a simple echo server. It listens on port 1234 for incoming + connections and just echos back any data it receives. + +*/ + + + + +#include <dlib/sockets.h> +#include <dlib/server.h> +#include <iostream> + +using namespace dlib; +using namespace std; + + + +class serv : public server +{ + void on_connect ( + connection& con + ) + { + char ch; + while (con.read(&ch,1) > 0) + { + // we are just reading one char at a time and writing it back + // to the connection. If there is some problem writing the char + // then we quit the loop. + if (con.write(&ch,1) != 1) + break; + } + } + +}; + + +int main() +{ + try + { + serv our_server; + + // set up the server object we have made + our_server.set_listening_port(1234); + // Tell the server to begin accepting connections. + our_server.start_async(); + + cout << "Press enter to end this program" << endl; + cin.get(); + } + catch (exception& e) + { + cout << e.what() << endl; + } +} + diff --git a/ml/dlib/examples/sockstreambuf_ex.cpp b/ml/dlib/examples/sockstreambuf_ex.cpp new file mode 100644 index 00000000..93200baa --- /dev/null +++ b/ml/dlib/examples/sockstreambuf_ex.cpp @@ -0,0 +1,92 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the sockets and sockstreambuf + components from the dlib C++ Library. Note that there is also an + iosockstream object in dlib that is often simpler to use, see + iosockstream_ex.cpp for an example of its use. + + This program simply connects to www.google.com at port 80 and requests the + main Google web page. It then prints what it gets back from Google to the + screen. + + + For those of you curious about HTTP check out the excellent introduction at + http://www.jmarshall.com/easy/http/ +*/ + +#include <iostream> +#include <memory> + +#include <dlib/sockets.h> +#include <dlib/sockstreambuf.h> + +using namespace std; +using namespace dlib; + +int main() +{ + try + { + // Connect to Google's web server which listens on port 80. If this + // fails it will throw a dlib::socket_error exception. Note that we + // are using a smart pointer here to contain the connection pointer + // returned from connect. Doing this ensures that the connection + // is deleted even if someone throws an exception somewhere in your code. + std::unique_ptr<connection> con(connect("www.google.com",80)); + + + { + // Create a stream buffer for our connection + sockstreambuf buf(con); + // Now stick that stream buffer into an iostream object + iostream stream(&buf); + // This command causes the iostream to flush its output buffers + // whenever someone makes a read request. + buf.flush_output_on_read(); + + // Now we make the HTTP GET request for the main Google page. + stream << "GET / HTTP/1.0\r\n\r\n"; + + // Here we print each character we get back one at a time. + int ch = stream.get(); + while (ch != EOF) + { + cout << (char)ch; + ch = stream.get(); + } + + // At the end of this scope buf will be destructed and flush + // anything it still contains to the connection. Thus putting + // this } here makes it safe to destroy the connection later on. + // If we just destroyed the connection before buf was destructed + // then buf might try to flush its data to a closed connection + // which would be an error. + } + + // Here we call close_gracefully(). It takes a connection and performs + // a proper TCP shutdown by sending a FIN packet to the other end of the + // connection and waiting half a second for the other end to close the + // connection as well. If half a second goes by without the other end + // responding then the connection is forcefully shutdown and deleted. + // + // You usually want to perform a graceful shutdown of your TCP connections + // because there might be some data you tried to send that is still buffered + // in the operating system's output buffers. If you just killed the + // connection it might not be sent to the other side (although maybe + // you don't care, and in the case of this example it doesn't really matter. + // But I'm only putting this here for the purpose of illustration :-). + // In any case, this function is provided to allow you to perform a graceful + // close if you so choose. + // + // Also note that the timeout can be changed by suppling an optional argument + // to this function. + close_gracefully(con); + } + catch (exception& e) + { + cout << e.what() << endl; + } +} + + diff --git a/ml/dlib/examples/sqlite_ex.cpp b/ml/dlib/examples/sqlite_ex.cpp new file mode 100644 index 00000000..4f1e30a2 --- /dev/null +++ b/ml/dlib/examples/sqlite_ex.cpp @@ -0,0 +1,137 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt + +/* + This example gives a quick overview of dlib's C++ API for the popular SQLite library. +*/ + + +#include <iostream> +#include <dlib/sqlite.h> +#include <dlib/matrix.h> + +using namespace dlib; +using namespace std; + +// ---------------------------------------------------------------------------------------- + +bool table_exists ( + database& db, + const std::string& tablename +) +{ + // Sometimes you want to just run a query that returns one thing. In this case, we + // want to see how many tables are in our database with the given tablename. The only + // possible outcomes are 1 or 0 and we can do this by looking in the special + // sqlite_master table that records such database metadata. For these kinds of "one + // result" queries we can use the query_int() method which executes a SQL statement + // against a database and returns the result as an int. + return query_int(db, "select count(*) from sqlite_master where name = '"+tablename+"'")==1; +} + +// ---------------------------------------------------------------------------------------- + +int main() try +{ + // Open the SQLite database in the stuff.db file (or create an empty database in + // stuff.db if it doesn't exist). + database db("stuff.db"); + + // Create a people table that records a person's name, age, and their "data". + if (!table_exists(db,"people")) + db.exec("create table people (name, age, data)"); + + + // Now let's add some data to this table. We can do this by making a statement object + // as shown. Here we use the special ? character to indicate bindable arguments and + // below we will use st.bind() statements to populate those fields with values. + statement st(db, "insert into people VALUES(?,?,?)"); + + // The data for Davis + string name = "Davis"; + int age = 32; + matrix<double> m = randm(3,3); // some random "data" for Davis + + // You can bind any of the built in scalar types (e.g. int, float) or std::string and + // they will go into the table as the appropriate SQL types (e.g. INT, TEXT). If you + // try to bind any other object it will be saved as a binary blob if the type has an + // appropriate void serialize(const T&, std::ostream&) function defined for it. The + // matrix has such a serialize function (as do most dlib types) so the bind below saves + // the matrix as a binary blob. + st.bind(1, name); + st.bind(2, age); + st.bind(3, m); + st.exec(); // execute the SQL statement. This does the insert. + + + // We can reuse the statement to add more data to the database. In fact, if you have a + // bunch of statements to execute it is fastest if you reuse them in this manner. + name = "John"; + age = 82; + m = randm(2,3); + st.bind(1, name); + st.bind(2, age); + st.bind(3, m); + st.exec(); + + + + // Now lets print out all the rows in the people table. + statement st2(db, "select * from people"); + st2.exec(); + // Loop over all the rows obtained by executing the statement with .exec(). + while(st2.move_next()) + { + string name; + int age; + matrix<double> m; + // Analogously to bind, we can grab the columns straight into C++ types. Here the + // matrix is automatically deserialized by calling its deserialize() routine. + st2.get_column(0, name); + st2.get_column(1, age); + st2.get_column(2, m); + cout << name << " " << age << "\n" << m << endl << endl; + } + + + + // Finally, if you want to make a bunch of atomic changes to a database then you should + // do so inside a transaction. Here, either all the database modifications that occur + // between the creation of my_trans and the invocation of my_trans.commit() will appear + // in the database or none of them will. This way, if an exception or other error + // happens halfway though your transaction you won't be left with your database in an + // inconsistent state. + // + // Additionally, if you are going to do a large amount of inserts or updates then it is + // much faster to group them into a transaction. + transaction my_trans(db); + + name = "Dude"; + age = 49; + m = randm(4,2); + st.bind(1, name); + st.bind(2, age); + st.bind(3, m); + st.exec(); + + name = "Bob"; + age = 29; + m = randm(2,2); + st.bind(1, name); + st.bind(2, age); + st.bind(3, m); + st.exec(); + + // If you comment out this line then you will see that these inserts do not take place. + // Specifically, what happens is that when my_trans is destructed it rolls back the + // entire transaction unless commit() has been called. + my_trans.commit(); + +} +catch (std::exception& e) +{ + cout << e.what() << endl; +} + +// ---------------------------------------------------------------------------------------- + + diff --git a/ml/dlib/examples/std_allocator_ex.cpp b/ml/dlib/examples/std_allocator_ex.cpp new file mode 100644 index 00000000..0dc583fa --- /dev/null +++ b/ml/dlib/examples/std_allocator_ex.cpp @@ -0,0 +1,57 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This is an example illustrating the use of the dlib::std_allocator object. + + In this example we will create the necessary typedefs to give the + dlib::std_allocator object to the standard string and vector objects + in the STL. Thus we will create versions of std::string and std::vector + that perform all their memory allocations and deallocations via one of + the dlib memory manager objects. +*/ + + +// include everything we need for this example +#include <vector> +#include <iostream> +#include <string> +#include <dlib/std_allocator.h> +#include <dlib/memory_manager.h> +#include <dlib/memory_manager_stateless.h> + +using namespace std; +using namespace dlib; + + +int main() +{ + // Make a typedef for an allocator that uses the thread safe memory_manager_stateless object with a + // global memory pool. This version of the memory_manager_stateless object keeps everything it allocates + // in a global memory pool and doesn't release any memory until the program terminates. + typedef std_allocator<char, memory_manager_stateless<char>::kernel_2_3a> alloc_char_with_global_memory_pool; + + // Now make a typedef for a C++ standard string that uses our new allocator type + typedef std::basic_string<char, char_traits<char>, alloc_char_with_global_memory_pool > dstring; + + + // typedef another allocator for dstring objects + typedef std_allocator<dstring, memory_manager_stateless<char>::kernel_2_3a> alloc_dstring_with_global_memory_pool; + + // Now make a typedef for a C++ standard vector that uses our new allocator type and also contains the new dstring + typedef std::vector<dstring, alloc_dstring_with_global_memory_pool > dvector; + + // Now we can use the string and vector we have as we normally would. So for example, I can make a + // dvector and add 4 strings into it like so: + dvector v; + v.push_back("one"); + v.push_back("two"); + v.push_back("three"); + v.push_back("four"); + + // And now we print out the contents of our vector + for (unsigned long i = 0; i < v.size(); ++i) + { + cout << v[i] << endl; + } + +} + diff --git a/ml/dlib/examples/surf_ex.cpp b/ml/dlib/examples/surf_ex.cpp new file mode 100644 index 00000000..70fe1900 --- /dev/null +++ b/ml/dlib/examples/surf_ex.cpp @@ -0,0 +1,82 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is a simple example illustrating the use of the get_surf_points() function. It + pulls out SURF points from an input image and displays them on the screen as an overlay + on the image. + + For a description of the SURF algorithm you should consult the following papers: + This is the original paper which introduced the algorithm: + SURF: Speeded Up Robust Features + By Herbert Bay, Tinne Tuytelaars, and Luc Van Gool + + This paper provides a nice detailed overview of how the algorithm works: + Notes on the OpenSURF Library by Christopher Evans + +*/ + + + +#include <dlib/image_keypoint/draw_surf_points.h> +#include <dlib/image_io.h> +#include <dlib/image_keypoint.h> +#include <fstream> + + +using namespace std; +using namespace dlib; + +// ---------------------------------------------------------------------------- + +int main(int argc, char** argv) +{ + try + { + // make sure the user entered an argument to this program + if (argc != 2) + { + cout << "error, you have to enter a BMP file as an argument to this program" << endl; + return 1; + } + + // Here we declare an image object that can store rgb_pixels. Note that in dlib + // there is no explicit image object, just a 2D array and various pixel types. + array2d<rgb_pixel> img; + + // Now load the image file into our image. If something is wrong then load_image() + // will throw an exception. Also, if you linked with libpng and libjpeg then + // load_image() can load PNG and JPEG files in addition to BMP files. + load_image(img, argv[1]); + + // Get SURF points from the image. Note that get_surf_points() has some optional + // arguments that allow you to control the number of points you get back. Here we + // simply take the default. + std::vector<surf_point> sp = get_surf_points(img); + cout << "number of SURF points found: "<< sp.size() << endl; + + if (sp.size() > 0) + { + // A surf_point object contains a lot of information describing each point. + // The most important fields are shown below: + cout << "center of first SURF point: "<< sp[0].p.center << endl; + cout << "pyramid scale: " << sp[0].p.scale << endl; + cout << "SURF descriptor: \n" << sp[0].des << endl; + } + + // Create a window to display the input image and the SURF points. (Note that + // you can zoom into the window by holding CTRL and scrolling the mouse wheel) + image_window my_window(img); + draw_surf_points(my_window, sp); + + // wait until the user closes the window before we let the program + // terminate. + my_window.wait_until_closed(); + } + catch (exception& e) + { + cout << "exception thrown: " << e.what() << endl; + } +} + +// ---------------------------------------------------------------------------- + diff --git a/ml/dlib/examples/svm_c_ex.cpp b/ml/dlib/examples/svm_c_ex.cpp new file mode 100644 index 00000000..b38d0e54 --- /dev/null +++ b/ml/dlib/examples/svm_c_ex.cpp @@ -0,0 +1,266 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the support vector machine + utilities from the dlib C++ Library. In particular, we show how to use the + C parametrization of the SVM in this example. + + This example creates a simple set of data to train on and then shows + you how to use the cross validation and svm training functions + to find a good decision function that can classify examples in our + data set. + + + The data used in this example will be 2 dimensional data and will + come from a distribution where points with a distance less than 10 + from the origin are labeled +1 and all other points are labeled + as -1. + +*/ + + +#include <iostream> +#include <dlib/svm.h> + +using namespace std; +using namespace dlib; + + +int main() +{ + // The svm functions use column vectors to contain a lot of the data on + // which they operate. So the first thing we do here is declare a convenient + // typedef. + + // This typedef declares a matrix with 2 rows and 1 column. It will be the + // object that contains each of our 2 dimensional samples. (Note that if + // you wanted more than 2 features in this vector you can simply change the + // 2 to something else. Or if you don't know how many features you want + // until runtime then you can put a 0 here and use the matrix.set_size() + // member function) + typedef matrix<double, 2, 1> sample_type; + + // This is a typedef for the type of kernel we are going to use in this + // example. In this case I have selected the radial basis kernel that can + // operate on our 2D sample_type objects. You can use your own custom + // kernels with these tools as well, see custom_trainer_ex.cpp for an + // example. + typedef radial_basis_kernel<sample_type> kernel_type; + + + // Now we make objects to contain our samples and their respective labels. + std::vector<sample_type> samples; + std::vector<double> labels; + + // Now let's put some data into our samples and labels objects. We do this + // by looping over a bunch of points and labeling them according to their + // distance from the origin. + for (int r = -20; r <= 20; ++r) + { + for (int c = -20; c <= 20; ++c) + { + sample_type samp; + samp(0) = r; + samp(1) = c; + samples.push_back(samp); + + // if this point is less than 10 from the origin + if (sqrt((double)r*r + c*c) <= 10) + labels.push_back(+1); + else + labels.push_back(-1); + + } + } + + + // Here we normalize all the samples by subtracting their mean and dividing + // by their standard deviation. This is generally a good idea since it + // often heads off numerical stability problems and also prevents one large + // feature from smothering others. Doing this doesn't matter much in this + // example so I'm just doing this here so you can see an easy way to + // accomplish it. + vector_normalizer<sample_type> normalizer; + // Let the normalizer learn the mean and standard deviation of the samples. + normalizer.train(samples); + // now normalize each sample + for (unsigned long i = 0; i < samples.size(); ++i) + samples[i] = normalizer(samples[i]); + + + // Now that we have some data we want to train on it. However, there are + // two parameters to the training. These are the C and gamma parameters. + // Our choice for these parameters will influence how good the resulting + // decision function is. To test how good a particular choice of these + // parameters are we can use the cross_validate_trainer() function to perform + // n-fold cross validation on our training data. However, there is a + // problem with the way we have sampled our distribution above. The problem + // is that there is a definite ordering to the samples. That is, the first + // half of the samples look like they are from a different distribution than + // the second half. This would screw up the cross validation process but we + // can fix it by randomizing the order of the samples with the following + // function call. + randomize_samples(samples, labels); + + + // here we make an instance of the svm_c_trainer object that uses our kernel + // type. + svm_c_trainer<kernel_type> trainer; + + // Now we loop over some different C and gamma values to see how good they + // are. Note that this is a very simple way to try out a few possible + // parameter choices. You should look at the model_selection_ex.cpp program + // for examples of more sophisticated strategies for determining good + // parameter choices. + cout << "doing cross validation" << endl; + for (double gamma = 0.00001; gamma <= 1; gamma *= 5) + { + for (double C = 1; C < 100000; C *= 5) + { + // tell the trainer the parameters we want to use + trainer.set_kernel(kernel_type(gamma)); + trainer.set_c(C); + + cout << "gamma: " << gamma << " C: " << C; + // Print out the cross validation accuracy for 3-fold cross validation using + // the current gamma and C. cross_validate_trainer() returns a row vector. + // The first element of the vector is the fraction of +1 training examples + // correctly classified and the second number is the fraction of -1 training + // examples correctly classified. + cout << " cross validation accuracy: " + << cross_validate_trainer(trainer, samples, labels, 3); + } + } + + + // From looking at the output of the above loop it turns out that good + // values for C and gamma for this problem are 5 and 0.15625 respectively. + // So that is what we will use. + + // Now we train on the full set of data and obtain the resulting decision + // function. The decision function will return values >= 0 for samples it + // predicts are in the +1 class and numbers < 0 for samples it predicts to + // be in the -1 class. + trainer.set_kernel(kernel_type(0.15625)); + trainer.set_c(5); + typedef decision_function<kernel_type> dec_funct_type; + typedef normalized_function<dec_funct_type> funct_type; + + // Here we are making an instance of the normalized_function object. This + // object provides a convenient way to store the vector normalization + // information along with the decision function we are going to learn. + funct_type learned_function; + learned_function.normalizer = normalizer; // save normalization information + learned_function.function = trainer.train(samples, labels); // perform the actual SVM training and save the results + + // print out the number of support vectors in the resulting decision function + cout << "\nnumber of support vectors in our learned_function is " + << learned_function.function.basis_vectors.size() << endl; + + // Now let's try this decision_function on some samples we haven't seen before. + sample_type sample; + + sample(0) = 3.123; + sample(1) = 2; + cout << "This is a +1 class example, the classifier output is " << learned_function(sample) << endl; + + sample(0) = 3.123; + sample(1) = 9.3545; + cout << "This is a +1 class example, the classifier output is " << learned_function(sample) << endl; + + sample(0) = 13.123; + sample(1) = 9.3545; + cout << "This is a -1 class example, the classifier output is " << learned_function(sample) << endl; + + sample(0) = 13.123; + sample(1) = 0; + cout << "This is a -1 class example, the classifier output is " << learned_function(sample) << endl; + + + // We can also train a decision function that reports a well conditioned + // probability instead of just a number > 0 for the +1 class and < 0 for the + // -1 class. An example of doing that follows: + typedef probabilistic_decision_function<kernel_type> probabilistic_funct_type; + typedef normalized_function<probabilistic_funct_type> pfunct_type; + + pfunct_type learned_pfunct; + learned_pfunct.normalizer = normalizer; + learned_pfunct.function = train_probabilistic_decision_function(trainer, samples, labels, 3); + // Now we have a function that returns the probability that a given sample is of the +1 class. + + // print out the number of support vectors in the resulting decision function. + // (it should be the same as in the one above) + cout << "\nnumber of support vectors in our learned_pfunct is " + << learned_pfunct.function.decision_funct.basis_vectors.size() << endl; + + sample(0) = 3.123; + sample(1) = 2; + cout << "This +1 class example should have high probability. Its probability is: " + << learned_pfunct(sample) << endl; + + sample(0) = 3.123; + sample(1) = 9.3545; + cout << "This +1 class example should have high probability. Its probability is: " + << learned_pfunct(sample) << endl; + + sample(0) = 13.123; + sample(1) = 9.3545; + cout << "This -1 class example should have low probability. Its probability is: " + << learned_pfunct(sample) << endl; + + sample(0) = 13.123; + sample(1) = 0; + cout << "This -1 class example should have low probability. Its probability is: " + << learned_pfunct(sample) << endl; + + + + // Another thing that is worth knowing is that just about everything in dlib + // is serializable. So for example, you can save the learned_pfunct object + // to disk and recall it later like so: + serialize("saved_function.dat") << learned_pfunct; + + // Now let's open that file back up and load the function object it contains. + deserialize("saved_function.dat") >> learned_pfunct; + + // Note that there is also an example program that comes with dlib called + // the file_to_code_ex.cpp example. It is a simple program that takes a + // file and outputs a piece of C++ code that is able to fully reproduce the + // file's contents in the form of a std::string object. So you can use that + // along with the std::istringstream to save learned decision functions + // inside your actual C++ code files if you want. + + + + + // Lastly, note that the decision functions we trained above involved well + // over 200 basis vectors. Support vector machines in general tend to find + // decision functions that involve a lot of basis vectors. This is + // significant because the more basis vectors in a decision function, the + // longer it takes to classify new examples. So dlib provides the ability + // to find an approximation to the normal output of a trainer using fewer + // basis vectors. + + // Here we determine the cross validation accuracy when we approximate the + // output using only 10 basis vectors. To do this we use the reduced2() + // function. It takes a trainer object and the number of basis vectors to + // use and returns a new trainer object that applies the necessary post + // processing during the creation of decision function objects. + cout << "\ncross validation accuracy with only 10 support vectors: " + << cross_validate_trainer(reduced2(trainer,10), samples, labels, 3); + + // Let's print out the original cross validation score too for comparison. + cout << "cross validation accuracy with all the original support vectors: " + << cross_validate_trainer(trainer, samples, labels, 3); + + // When you run this program you should see that, for this problem, you can + // reduce the number of basis vectors down to 10 without hurting the cross + // validation accuracy. + + + // To get the reduced decision function out we would just do this: + learned_function.function = reduced2(trainer,10).train(samples, labels); + // And similarly for the probabilistic_decision_function: + learned_pfunct.function = train_probabilistic_decision_function(reduced2(trainer,10), samples, labels, 3); +} + diff --git a/ml/dlib/examples/svm_ex.cpp b/ml/dlib/examples/svm_ex.cpp new file mode 100644 index 00000000..3d5d0bb8 --- /dev/null +++ b/ml/dlib/examples/svm_ex.cpp @@ -0,0 +1,255 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the support vector machine + utilities from the dlib C++ Library. + + This example creates a simple set of data to train on and then shows + you how to use the cross validation and svm training functions + to find a good decision function that can classify examples in our + data set. + + + The data used in this example will be 2 dimensional data and will + come from a distribution where points with a distance less than 10 + from the origin are labeled +1 and all other points are labeled + as -1. + +*/ + + +#include <iostream> +#include <dlib/svm.h> + +using namespace std; +using namespace dlib; + + +int main() +{ + // The svm functions use column vectors to contain a lot of the data on which they + // operate. So the first thing we do here is declare a convenient typedef. + + // This typedef declares a matrix with 2 rows and 1 column. It will be the object that + // contains each of our 2 dimensional samples. (Note that if you wanted more than 2 + // features in this vector you can simply change the 2 to something else. Or if you + // don't know how many features you want until runtime then you can put a 0 here and + // use the matrix.set_size() member function) + typedef matrix<double, 2, 1> sample_type; + + // This is a typedef for the type of kernel we are going to use in this example. In + // this case I have selected the radial basis kernel that can operate on our 2D + // sample_type objects + typedef radial_basis_kernel<sample_type> kernel_type; + + + // Now we make objects to contain our samples and their respective labels. + std::vector<sample_type> samples; + std::vector<double> labels; + + // Now let's put some data into our samples and labels objects. We do this by looping + // over a bunch of points and labeling them according to their distance from the + // origin. + for (int r = -20; r <= 20; ++r) + { + for (int c = -20; c <= 20; ++c) + { + sample_type samp; + samp(0) = r; + samp(1) = c; + samples.push_back(samp); + + // if this point is less than 10 from the origin + if (sqrt((double)r*r + c*c) <= 10) + labels.push_back(+1); + else + labels.push_back(-1); + + } + } + + + // Here we normalize all the samples by subtracting their mean and dividing by their + // standard deviation. This is generally a good idea since it often heads off + // numerical stability problems and also prevents one large feature from smothering + // others. Doing this doesn't matter much in this example so I'm just doing this here + // so you can see an easy way to accomplish this with the library. + vector_normalizer<sample_type> normalizer; + // let the normalizer learn the mean and standard deviation of the samples + normalizer.train(samples); + // now normalize each sample + for (unsigned long i = 0; i < samples.size(); ++i) + samples[i] = normalizer(samples[i]); + + + // Now that we have some data we want to train on it. However, there are two + // parameters to the training. These are the nu and gamma parameters. Our choice for + // these parameters will influence how good the resulting decision function is. To + // test how good a particular choice of these parameters is we can use the + // cross_validate_trainer() function to perform n-fold cross validation on our training + // data. However, there is a problem with the way we have sampled our distribution + // above. The problem is that there is a definite ordering to the samples. That is, + // the first half of the samples look like they are from a different distribution than + // the second half. This would screw up the cross validation process but we can fix it + // by randomizing the order of the samples with the following function call. + randomize_samples(samples, labels); + + + // The nu parameter has a maximum value that is dependent on the ratio of the +1 to -1 + // labels in the training data. This function finds that value. + const double max_nu = maximum_nu(labels); + + // here we make an instance of the svm_nu_trainer object that uses our kernel type. + svm_nu_trainer<kernel_type> trainer; + + // Now we loop over some different nu and gamma values to see how good they are. Note + // that this is a very simple way to try out a few possible parameter choices. You + // should look at the model_selection_ex.cpp program for examples of more sophisticated + // strategies for determining good parameter choices. + cout << "doing cross validation" << endl; + for (double gamma = 0.00001; gamma <= 1; gamma *= 5) + { + for (double nu = 0.00001; nu < max_nu; nu *= 5) + { + // tell the trainer the parameters we want to use + trainer.set_kernel(kernel_type(gamma)); + trainer.set_nu(nu); + + cout << "gamma: " << gamma << " nu: " << nu; + // Print out the cross validation accuracy for 3-fold cross validation using + // the current gamma and nu. cross_validate_trainer() returns a row vector. + // The first element of the vector is the fraction of +1 training examples + // correctly classified and the second number is the fraction of -1 training + // examples correctly classified. + cout << " cross validation accuracy: " << cross_validate_trainer(trainer, samples, labels, 3); + } + } + + + // From looking at the output of the above loop it turns out that a good value for nu + // and gamma for this problem is 0.15625 for both. So that is what we will use. + + // Now we train on the full set of data and obtain the resulting decision function. We + // use the value of 0.15625 for nu and gamma. The decision function will return values + // >= 0 for samples it predicts are in the +1 class and numbers < 0 for samples it + // predicts to be in the -1 class. + trainer.set_kernel(kernel_type(0.15625)); + trainer.set_nu(0.15625); + typedef decision_function<kernel_type> dec_funct_type; + typedef normalized_function<dec_funct_type> funct_type; + + // Here we are making an instance of the normalized_function object. This object + // provides a convenient way to store the vector normalization information along with + // the decision function we are going to learn. + funct_type learned_function; + learned_function.normalizer = normalizer; // save normalization information + learned_function.function = trainer.train(samples, labels); // perform the actual SVM training and save the results + + // print out the number of support vectors in the resulting decision function + cout << "\nnumber of support vectors in our learned_function is " + << learned_function.function.basis_vectors.size() << endl; + + // Now let's try this decision_function on some samples we haven't seen before. + sample_type sample; + + sample(0) = 3.123; + sample(1) = 2; + cout << "This is a +1 class example, the classifier output is " << learned_function(sample) << endl; + + sample(0) = 3.123; + sample(1) = 9.3545; + cout << "This is a +1 class example, the classifier output is " << learned_function(sample) << endl; + + sample(0) = 13.123; + sample(1) = 9.3545; + cout << "This is a -1 class example, the classifier output is " << learned_function(sample) << endl; + + sample(0) = 13.123; + sample(1) = 0; + cout << "This is a -1 class example, the classifier output is " << learned_function(sample) << endl; + + + // We can also train a decision function that reports a well conditioned probability + // instead of just a number > 0 for the +1 class and < 0 for the -1 class. An example + // of doing that follows: + typedef probabilistic_decision_function<kernel_type> probabilistic_funct_type; + typedef normalized_function<probabilistic_funct_type> pfunct_type; + + pfunct_type learned_pfunct; + learned_pfunct.normalizer = normalizer; + learned_pfunct.function = train_probabilistic_decision_function(trainer, samples, labels, 3); + // Now we have a function that returns the probability that a given sample is of the +1 class. + + // print out the number of support vectors in the resulting decision function. + // (it should be the same as in the one above) + cout << "\nnumber of support vectors in our learned_pfunct is " + << learned_pfunct.function.decision_funct.basis_vectors.size() << endl; + + sample(0) = 3.123; + sample(1) = 2; + cout << "This +1 class example should have high probability. Its probability is: " + << learned_pfunct(sample) << endl; + + sample(0) = 3.123; + sample(1) = 9.3545; + cout << "This +1 class example should have high probability. Its probability is: " + << learned_pfunct(sample) << endl; + + sample(0) = 13.123; + sample(1) = 9.3545; + cout << "This -1 class example should have low probability. Its probability is: " + << learned_pfunct(sample) << endl; + + sample(0) = 13.123; + sample(1) = 0; + cout << "This -1 class example should have low probability. Its probability is: " + << learned_pfunct(sample) << endl; + + + + // Another thing that is worth knowing is that just about everything in dlib is + // serializable. So for example, you can save the learned_pfunct object to disk and + // recall it later like so: + serialize("saved_function.dat") << learned_pfunct; + + // Now let's open that file back up and load the function object it contains. + deserialize("saved_function.dat") >> learned_pfunct; + + // Note that there is also an example program that comes with dlib called the + // file_to_code_ex.cpp example. It is a simple program that takes a file and outputs a + // piece of C++ code that is able to fully reproduce the file's contents in the form of + // a std::string object. So you can use that along with the std::istringstream to save + // learned decision functions inside your actual C++ code files if you want. + + + + + // Lastly, note that the decision functions we trained above involved well over 200 + // basis vectors. Support vector machines in general tend to find decision functions + // that involve a lot of basis vectors. This is significant because the more basis + // vectors in a decision function, the longer it takes to classify new examples. So + // dlib provides the ability to find an approximation to the normal output of a trainer + // using fewer basis vectors. + + // Here we determine the cross validation accuracy when we approximate the output using + // only 10 basis vectors. To do this we use the reduced2() function. It takes a + // trainer object and the number of basis vectors to use and returns a new trainer + // object that applies the necessary post processing during the creation of decision + // function objects. + cout << "\ncross validation accuracy with only 10 support vectors: " + << cross_validate_trainer(reduced2(trainer,10), samples, labels, 3); + + // Let's print out the original cross validation score too for comparison. + cout << "cross validation accuracy with all the original support vectors: " + << cross_validate_trainer(trainer, samples, labels, 3); + + // When you run this program you should see that, for this problem, you can reduce the + // number of basis vectors down to 10 without hurting the cross validation accuracy. + + + // To get the reduced decision function out we would just do this: + learned_function.function = reduced2(trainer,10).train(samples, labels); + // And similarly for the probabilistic_decision_function: + learned_pfunct.function = train_probabilistic_decision_function(reduced2(trainer,10), samples, labels, 3); +} + diff --git a/ml/dlib/examples/svm_pegasos_ex.cpp b/ml/dlib/examples/svm_pegasos_ex.cpp new file mode 100644 index 00000000..e69b485f --- /dev/null +++ b/ml/dlib/examples/svm_pegasos_ex.cpp @@ -0,0 +1,160 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the dlib C++ library's + implementation of the pegasos algorithm for online training of support + vector machines. + + This example creates a simple binary classification problem and shows + you how to train a support vector machine on that data. + + The data used in this example will be 2 dimensional data and will + come from a distribution where points with a distance less than 10 + from the origin are labeled +1 and all other points are labeled + as -1. + +*/ + + +#include <iostream> +#include <ctime> +#include <vector> +#include <dlib/svm.h> + +using namespace std; +using namespace dlib; + + +int main() +{ + // The svm functions use column vectors to contain a lot of the data on which they + // operate. So the first thing we do here is declare a convenient typedef. + + // This typedef declares a matrix with 2 rows and 1 column. It will be the + // object that contains each of our 2 dimensional samples. (Note that if you wanted + // more than 2 features in this vector you can simply change the 2 to something else. + // Or if you don't know how many features you want until runtime then you can put a 0 + // here and use the matrix.set_size() member function) + typedef matrix<double, 2, 1> sample_type; + + + // This is a typedef for the type of kernel we are going to use in this example. + // In this case I have selected the radial basis kernel that can operate on our + // 2D sample_type objects + typedef radial_basis_kernel<sample_type> kernel_type; + + + // Here we create an instance of the pegasos svm trainer object we will be using. + svm_pegasos<kernel_type> trainer; + // Here we setup the parameters to this object. See the dlib documentation for a + // description of what these parameters are. + trainer.set_lambda(0.00001); + trainer.set_kernel(kernel_type(0.005)); + + // Set the maximum number of support vectors we want the trainer object to use + // in representing the decision function it is going to learn. In general, + // supplying a bigger number here will only ever give you a more accurate + // answer. However, giving a smaller number will make the algorithm run + // faster and decision rules that involve fewer support vectors also take + // less time to evaluate. + trainer.set_max_num_sv(10); + + std::vector<sample_type> samples; + std::vector<double> labels; + + // make an instance of a sample matrix so we can use it below + sample_type sample, center; + + center = 20, 20; + + // Now let's go into a loop and randomly generate 1000 samples. + srand(time(0)); + for (int i = 0; i < 10000; ++i) + { + // Make a random sample vector. + sample = randm(2,1)*40 - center; + + // Now if that random vector is less than 10 units from the origin then it is in + // the +1 class. + if (length(sample) <= 10) + { + // let the svm_pegasos learn about this sample + trainer.train(sample,+1); + + // save this sample so we can use it with the batch training examples below + samples.push_back(sample); + labels.push_back(+1); + } + else + { + // let the svm_pegasos learn about this sample + trainer.train(sample,-1); + + // save this sample so we can use it with the batch training examples below + samples.push_back(sample); + labels.push_back(-1); + } + } + + // Now we have trained our SVM. Let's see how well it did. + // Each of these statements prints out the output of the SVM given a particular sample. + // The SVM outputs a number > 0 if a sample is predicted to be in the +1 class and < 0 + // if a sample is predicted to be in the -1 class. + + sample(0) = 3.123; + sample(1) = 4; + cout << "This is a +1 example, its SVM output is: " << trainer(sample) << endl; + + sample(0) = 13.123; + sample(1) = 9.3545; + cout << "This is a -1 example, its SVM output is: " << trainer(sample) << endl; + + sample(0) = 13.123; + sample(1) = 0; + cout << "This is a -1 example, its SVM output is: " << trainer(sample) << endl; + + + + + + // The previous part of this example program showed you how to perform online training + // with the pegasos algorithm. But it is often the case that you have a dataset and you + // just want to perform batch learning on that dataset and get the resulting decision + // function. To support this the dlib library provides functions for converting an online + // training object like svm_pegasos into a batch training object. + + // First let's clear out anything in the trainer object. + trainer.clear(); + + // Now to begin with, you might want to compute the cross validation score of a trainer object + // on your data. To do this you should use the batch_cached() function to convert the svm_pegasos object + // into a batch training object. Note that the second argument to batch_cached() is the minimum + // learning rate the trainer object must report for the batch_cached() function to consider training + // complete. So smaller values of this parameter cause training to take longer but may result + // in a more accurate solution. + // Here we perform 4-fold cross validation and print the results + cout << "cross validation: " << cross_validate_trainer(batch_cached(trainer,0.1), samples, labels, 4); + + // Here is an example of creating a decision function. Note that we have used the verbose_batch_cached() + // function instead of batch_cached() as above. They do the same things except verbose_batch_cached() will + // print status messages to standard output while training is under way. + decision_function<kernel_type> df = verbose_batch_cached(trainer,0.1).train(samples, labels); + + // At this point we have obtained a decision function from the above batch mode training. + // Now we can use it on some test samples exactly as we did above. + + sample(0) = 3.123; + sample(1) = 4; + cout << "This is a +1 example, its SVM output is: " << df(sample) << endl; + + sample(0) = 13.123; + sample(1) = 9.3545; + cout << "This is a -1 example, its SVM output is: " << df(sample) << endl; + + sample(0) = 13.123; + sample(1) = 0; + cout << "This is a -1 example, its SVM output is: " << df(sample) << endl; + + +} + diff --git a/ml/dlib/examples/svm_rank_ex.cpp b/ml/dlib/examples/svm_rank_ex.cpp new file mode 100644 index 00000000..e39b90a1 --- /dev/null +++ b/ml/dlib/examples/svm_rank_ex.cpp @@ -0,0 +1,151 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the SVM-Rank tool from the dlib + C++ Library. This is a tool useful for learning to rank objects. For + example, you might use it to learn to rank web pages in response to a + user's query. The idea being to rank the most relevant pages higher than + non-relevant pages. + + + In this example, we will create a simple test dataset and show how to learn + a ranking function from it. The purpose of the function will be to give + "relevant" objects higher scores than "non-relevant" objects. The idea is + that you use this score to order the objects so that the most relevant + objects come to the top of the ranked list. + + + + Note that we use dense vectors (i.e. dlib::matrix objects) in this example, + however, the ranking tools can also use sparse vectors as well. See + svm_sparse_ex.cpp for an example. +*/ + +#include <dlib/svm.h> +#include <iostream> + + +using namespace std; +using namespace dlib; + + +int main() +{ + try + { + // Make a typedef for the kind of object we will be ranking. In this + // example, we are ranking 2-dimensional vectors. + typedef matrix<double,2,1> sample_type; + + + // Now let's make some testing data. To make it really simple, let's + // suppose that vectors with positive values in the first dimension + // should rank higher than other vectors. So what we do is make + // examples of relevant (i.e. high ranking) and non-relevant (i.e. low + // ranking) vectors and store them into a ranking_pair object like so: + ranking_pair<sample_type> data; + sample_type samp; + + // Make one relevant example. + samp = 1, 0; + data.relevant.push_back(samp); + + // Now make a non-relevant example. + samp = 0, 1; + data.nonrelevant.push_back(samp); + + + // Now that we have some data, we can use a machine learning method to + // learn a function that will give high scores to the relevant vectors + // and low scores to the non-relevant vectors. + + // The first thing we do is select the kernel we want to use. For the + // svm_rank_trainer there are only two options. The linear_kernel and + // sparse_linear_kernel. The later is used if you want to use sparse + // vectors to represent your objects. Since we are using dense vectors + // (i.e. dlib::matrix objects to represent the vectors) we use the + // linear_kernel. + typedef linear_kernel<sample_type> kernel_type; + + // Now make a trainer and tell it to learn a ranking function based on + // our data. + svm_rank_trainer<kernel_type> trainer; + decision_function<kernel_type> rank = trainer.train(data); + + // Now if you call rank on a vector it will output a ranking score. In + // particular, the ranking score for relevant vectors should be larger + // than the score for non-relevant vectors. + cout << "ranking score for a relevant vector: " << rank(data.relevant[0]) << endl; + cout << "ranking score for a non-relevant vector: " << rank(data.nonrelevant[0]) << endl; + // These output the following: + /* + ranking score for a relevant vector: 0.5 + ranking score for a non-relevant vector: -0.5 + */ + + + // If we want an overall measure of ranking accuracy we can compute the + // ordering accuracy and mean average precision values by calling + // test_ranking_function(). In this case, the ordering accuracy tells + // us how often a non-relevant vector was ranked ahead of a relevant + // vector. This function will return a 1 by 2 matrix containing these + // measures. In this case, it returns 1 1 indicating that the rank + // function outputs a perfect ranking. + cout << "testing (ordering accuracy, mean average precision): " << test_ranking_function(rank, data) << endl; + + // We can also see the ranking weights: + cout << "learned ranking weights: \n" << rank.basis_vectors(0) << endl; + // In this case they are: + // 0.5 + // -0.5 + + + + + + // In the above example, our data contains just two sets of objects. + // The relevant set and non-relevant set. The trainer is attempting to + // find a ranking function that gives every relevant vector a higher + // score than every non-relevant vector. Sometimes what you want to do + // is a little more complex than this. + // + // For example, in the web page ranking example we have to rank pages + // based on a user's query. In this case, each query will have its own + // set of relevant and non-relevant documents. What might be relevant + // to one query may well be non-relevant to another. So in this case + // we don't have a single global set of relevant web pages and another + // set of non-relevant web pages. + // + // To handle cases like this, we can simply give multiple ranking_pair + // instances to the trainer. Therefore, each ranking_pair would + // represent the relevant/non-relevant sets for a particular query. An + // example is shown below (for simplicity, we reuse our data from above + // to make 4 identical "queries"). + + std::vector<ranking_pair<sample_type> > queries; + queries.push_back(data); + queries.push_back(data); + queries.push_back(data); + queries.push_back(data); + + // We train just as before. + rank = trainer.train(queries); + + + // Now that we have multiple ranking_pair instances, we can also use + // cross_validate_ranking_trainer(). This performs cross-validation by + // splitting the queries up into folds. That is, it lets the trainer + // train on a subset of ranking_pair instances and tests on the rest. + // It does this over 4 different splits and returns the overall ranking + // accuracy based on the held out data. Just like test_ranking_function(), + // it reports both the ordering accuracy and mean average precision. + cout << "cross-validation (ordering accuracy, mean average precision): " + << cross_validate_ranking_trainer(trainer, queries, 4) << endl; + + } + catch (std::exception& e) + { + cout << e.what() << endl; + } +} + diff --git a/ml/dlib/examples/svm_sparse_ex.cpp b/ml/dlib/examples/svm_sparse_ex.cpp new file mode 100644 index 00000000..5d68e4a2 --- /dev/null +++ b/ml/dlib/examples/svm_sparse_ex.cpp @@ -0,0 +1,120 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example showing how to use sparse feature vectors with + the dlib C++ library's machine learning tools. + + This example creates a simple binary classification problem and shows + you how to train a support vector machine on that data. + + The data used in this example will be 100 dimensional data and will + come from a simple linearly separable distribution. +*/ + + +#include <iostream> +#include <ctime> +#include <vector> +#include <dlib/svm.h> + +using namespace std; +using namespace dlib; + + +int main() +{ + // In this example program we will be dealing with feature vectors that are sparse (i.e. most + // of the values in each vector are zero). So rather than using a dlib::matrix we can use + // one of the containers from the STL to represent our sample vectors. In particular, we + // can use the std::map to represent sparse vectors. (Note that you don't have to use std::map. + // Any STL container of std::pair objects that is sorted can be used. So for example, you could + // use a std::vector<std::pair<unsigned long,double> > here so long as you took care to sort every vector) + typedef std::map<unsigned long,double> sample_type; + + + // This is a typedef for the type of kernel we are going to use in this example. + // Since our data is linearly separable I picked the linear kernel. Note that if you + // are using a sparse vector representation like std::map then you have to use a kernel + // meant to be used with that kind of data type. + typedef sparse_linear_kernel<sample_type> kernel_type; + + + // Here we create an instance of the pegasos svm trainer object we will be using. + svm_pegasos<kernel_type> trainer; + // Here we setup a parameter to this object. See the dlib documentation for a + // description of what this parameter does. + trainer.set_lambda(0.00001); + + // Let's also use the svm trainer specially optimized for the linear_kernel and + // sparse_linear_kernel. + svm_c_linear_trainer<kernel_type> linear_trainer; + // This trainer solves the "C" formulation of the SVM. See the documentation for + // details. + linear_trainer.set_c(10); + + std::vector<sample_type> samples; + std::vector<double> labels; + + // make an instance of a sample vector so we can use it below + sample_type sample; + + + // Now let's go into a loop and randomly generate 10000 samples. + srand(time(0)); + double label = +1; + for (int i = 0; i < 10000; ++i) + { + // flip this flag + label *= -1; + + sample.clear(); + + // now make a random sparse sample with at most 10 non-zero elements + for (int j = 0; j < 10; ++j) + { + int idx = std::rand()%100; + double value = static_cast<double>(std::rand())/RAND_MAX; + + sample[idx] = label*value; + } + + // let the svm_pegasos learn about this sample. + trainer.train(sample,label); + + // Also save the samples we are generating so we can let the svm_c_linear_trainer + // learn from them below. + samples.push_back(sample); + labels.push_back(label); + } + + // In addition to the rule we learned with the pegasos trainer, let's also use our + // linear_trainer to learn a decision rule. + decision_function<kernel_type> df = linear_trainer.train(samples, labels); + + // Now we have trained our SVMs. Let's test them out a bit. + // Each of these statements prints the output of the SVMs given a particular sample. + // Each SVM outputs a number > 0 if a sample is predicted to be in the +1 class and < 0 + // if a sample is predicted to be in the -1 class. + + + sample.clear(); + sample[4] = 0.3; + sample[10] = 0.9; + cout << "This is a +1 example, its SVM output is: " << trainer(sample) << endl; + cout << "df: " << df(sample) << endl; + + sample.clear(); + sample[83] = -0.3; + sample[26] = -0.9; + sample[58] = -0.7; + cout << "This is a -1 example, its SVM output is: " << trainer(sample) << endl; + cout << "df: " << df(sample) << endl; + + sample.clear(); + sample[0] = -0.2; + sample[9] = -0.8; + cout << "This is a -1 example, its SVM output is: " << trainer(sample) << endl; + cout << "df: " << df(sample) << endl; + +} + diff --git a/ml/dlib/examples/svm_struct_ex.cpp b/ml/dlib/examples/svm_struct_ex.cpp new file mode 100644 index 00000000..f79ae4d1 --- /dev/null +++ b/ml/dlib/examples/svm_struct_ex.cpp @@ -0,0 +1,414 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the structural SVM solver from the dlib C++ + Library. Therefore, this example teaches you the central ideas needed to setup a + structural SVM model for your machine learning problems. To illustrate the process, we + use dlib's structural SVM solver to learn the parameters of a simple multi-class + classifier. We first discuss the multi-class classifier model and then walk through + using the structural SVM tools to find the parameters of this classification model. + +*/ + + +#include <iostream> +#include <dlib/svm_threaded.h> + +using namespace std; +using namespace dlib; + + +// Before we start, we define three typedefs we will use throughout this program. The +// first is used to represent the parameter vector the structural SVM is learning, the +// second is used to represent the "sample type". In this example program it is just a +// vector but in general when using a structural SVM your sample type can be anything you +// want (e.g. a string or an image). The last typedef is the type used to represent the +// PSI vector which is part of the structural SVM model which we will explain in detail +// later on. But the important thing to note here is that you can use either a dense +// representation (i.e. a dlib::matrix object) or a sparse representation for the PSI +// vector. See svm_sparse_ex.cpp for an introduction to sparse vectors in dlib. Here we +// use the same type for each of these three things to keep the example program simple. +typedef matrix<double,0,1> column_vector; // Must be a dlib::matrix type. +typedef matrix<double,0,1> sample_type; // Can be anything you want. +typedef matrix<double,0,1> feature_vector_type; // Must be dlib::matrix or some kind of sparse vector. + +// ---------------------------------------------------------------------------------------- + +int predict_label (const column_vector& weights, const sample_type& sample); +column_vector train_three_class_classifier (const std::vector<sample_type>& samples, const std::vector<int>& labels); + +// ---------------------------------------------------------------------------------------- + +int main() +{ + // In this example, we have three types of samples: class 0, 1, or 2. That is, each of + // our sample vectors falls into one of three classes. To keep this example very + // simple, each sample vector is zero everywhere except at one place. The non-zero + // dimension of each vector determines the class of the vector. So for example, the + // first element of samples has a class of 1 because samples[0](1) is the only non-zero + // element of samples[0]. + sample_type samp(3); + std::vector<sample_type> samples; + samp = 0,2,0; samples.push_back(samp); + samp = 1,0,0; samples.push_back(samp); + samp = 0,4,0; samples.push_back(samp); + samp = 0,0,3; samples.push_back(samp); + // Since we want to use a machine learning method to learn a 3-class classifier we need + // to record the labels of our samples. Here samples[i] has a class label of labels[i]. + std::vector<int> labels; + labels.push_back(1); + labels.push_back(0); + labels.push_back(1); + labels.push_back(2); + + + // Now that we have some training data we can tell the structural SVM to learn the + // parameters of our 3-class classifier model. The details of this will be explained + // later. For now, just note that it finds the weights (i.e. a vector of real valued + // parameters) such that predict_label(weights, sample) always returns the correct + // label for a sample vector. + column_vector weights = train_three_class_classifier(samples, labels); + + // Print the weights and then evaluate predict_label() on each of our training samples. + // Note that the correct label is predicted for each sample. + cout << weights << endl; + for (unsigned long i = 0; i < samples.size(); ++i) + cout << "predicted label for sample["<<i<<"]: " << predict_label(weights, samples[i]) << endl; +} + +// ---------------------------------------------------------------------------------------- + +int predict_label ( + const column_vector& weights, + const sample_type& sample +) +/*! + requires + - weights.size() == 9 + - sample.size() == 3 + ensures + - Given the 9-dimensional weight vector which defines a 3 class classifier, this + function predicts the class of the given 3-dimensional sample vector. + Therefore, the output of this function is either 0, 1, or 2 (i.e. one of the + three possible labels). +!*/ +{ + // Our 3-class classifier model can be thought of as containing 3 separate linear + // classifiers. So to predict the class of a sample vector we evaluate each of these + // three classifiers and then whatever classifier has the largest output "wins" and + // predicts the label of the sample. This is the popular one-vs-all multi-class + // classifier model. + // + // Keeping this in mind, the code below simply pulls the three separate weight vectors + // out of weights and then evaluates each against sample. The individual classifier + // scores are stored in scores and the highest scoring index is returned as the label. + column_vector w0, w1, w2; + w0 = rowm(weights, range(0,2)); + w1 = rowm(weights, range(3,5)); + w2 = rowm(weights, range(6,8)); + + column_vector scores(3); + scores = dot(w0, sample), dot(w1, sample), dot(w2, sample); + + return index_of_max(scores); +} + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + +class three_class_classifier_problem : public structural_svm_problem_threaded<column_vector, feature_vector_type> +{ + /*! + Now we arrive at the meat of this example program. To use dlib's structural SVM + solver you need to define an object which tells the structural SVM solver what to + do for your problem. In this example, this is done by defining the three_class_classifier_problem + object which inherits from structural_svm_problem_threaded. Before we get into the + details, we first discuss some background information on structural SVMs. + + A structural SVM is a supervised machine learning method for learning to predict + complex outputs. This is contrasted with a binary classifier which makes only simple + yes/no predictions. A structural SVM, on the other hand, can learn to predict + complex outputs such as entire parse trees or DNA sequence alignments. To do this, + it learns a function F(x,y) which measures how well a particular data sample x + matches a label y, where a label is potentially a complex thing like a parse tree. + However, to keep this example program simple we use only a 3 category label output. + + At test time, the best label for a new x is given by the y which maximizes F(x,y). + To put this into the context of the current example, F(x,y) computes the score for + a given sample and class label. The predicted class label is therefore whatever + value of y which makes F(x,y) the biggest. This is exactly what predict_label() + does. That is, it computes F(x,0), F(x,1), and F(x,2) and then reports which label + has the biggest value. + + At a high level, a structural SVM can be thought of as searching the parameter space + of F(x,y) for the set of parameters that make the following inequality true as often + as possible: + F(x_i,y_i) > max{over all incorrect labels of x_i} F(x_i, y_incorrect) + That is, it seeks to find the parameter vector such that F(x,y) always gives the + highest score to the correct output. To define the structural SVM optimization + problem precisely, we first introduce some notation: + - let PSI(x,y) == the joint feature vector for input x and a label y. + - let F(x,y|w) == dot(w,PSI(x,y)). + (we use the | notation to emphasize that F() has the parameter vector of + weights called w) + - let LOSS(idx,y) == the loss incurred for predicting that the idx-th training + sample has a label of y. Note that LOSS() should always be >= 0 and should + become exactly 0 when y is the correct label for the idx-th sample. Moreover, + it should notionally indicate how bad it is to predict y for the idx'th sample. + - let x_i == the i-th training sample. + - let y_i == the correct label for the i-th training sample. + - The number of data samples is N. + + Then the optimization problem solved by dlib's structural SVM solver is the following: + Minimize: h(w) == 0.5*dot(w,w) + C*R(w) + + Where R(w) == sum from i=1 to N: 1/N * sample_risk(i,w) + and sample_risk(i,w) == max over all Y: LOSS(i,Y) + F(x_i,Y|w) - F(x_i,y_i|w) + and C > 0 + + You can think of the sample_risk(i,w) as measuring the degree of error you would make + when predicting the label of the i-th sample using parameters w. That is, it is zero + only when the correct label would be predicted and grows larger the more "wrong" the + predicted output becomes. Therefore, the objective function is minimizing a balance + between making the weights small (typically this reduces overfitting) and fitting the + training data. The degree to which you try to fit the data is controlled by the C + parameter. + + For a more detailed introduction to structured support vector machines you should + consult the following paper: + Predicting Structured Objects with Support Vector Machines by + Thorsten Joachims, Thomas Hofmann, Yisong Yue, and Chun-nam Yu + + !*/ + +public: + + // Finally, we come back to the code. To use dlib's structural SVM solver you need to + // provide the things discussed above. This is the number of training samples, the + // dimensionality of PSI(), as well as methods for calculating the loss values and + // PSI() vectors. You will also need to write code that can compute: max over all Y: + // LOSS(i,Y) + F(x_i,Y|w). In particular, the three_class_classifier_problem class is + // required to implement the following four virtual functions: + // - get_num_dimensions() + // - get_num_samples() + // - get_truth_joint_feature_vector() + // - separation_oracle() + + + // But first, we declare a constructor so we can populate our three_class_classifier_problem + // object with the data we need to define our machine learning problem. All we do here + // is take in the training samples and their labels as well as a number indicating how + // many threads the structural SVM solver will use. You can declare this constructor + // any way you like since it is not used by any of the dlib tools. + three_class_classifier_problem ( + const std::vector<sample_type>& samples_, + const std::vector<int>& labels_, + const unsigned long num_threads + ) : + structural_svm_problem_threaded<column_vector, feature_vector_type>(num_threads), + samples(samples_), + labels(labels_) + {} + + feature_vector_type make_psi ( + const sample_type& x, + const int label + ) const + /*! + ensures + - returns the vector PSI(x,label) + !*/ + { + // All we are doing here is taking x, which is a 3 dimensional sample vector in this + // example program, and putting it into one of 3 places in a 9 dimensional PSI + // vector, which we then return. So this function returns PSI(x,label). To see why + // we setup PSI like this, recall how predict_label() works. It takes in a 9 + // dimensional weight vector and breaks the vector into 3 pieces. Each piece then + // defines a different classifier and we use them in a one-vs-all manner to predict + // the label. So now that we are in the structural SVM code we have to define the + // PSI vector to correspond to this usage. That is, we need to setup PSI so that + // argmax_y dot(weights,PSI(x,y)) == predict_label(weights,x). This is how we tell + // the structural SVM solver what kind of problem we are trying to solve. + // + // It's worth emphasizing that the single biggest step in using a structural SVM is + // deciding how you want to represent PSI(x,label). It is always a vector, but + // deciding what to put into it to solve your problem is often not a trivial task. + // Part of the difficulty is that you need an efficient method for finding the label + // that makes dot(w,PSI(x,label)) the biggest. Sometimes this is easy, but often + // finding the max scoring label turns into a difficult combinatorial optimization + // problem. So you need to pick a PSI that doesn't make the label maximization step + // intractable but also still well models your problem. + // + // Finally, note that make_psi() is a helper routine we define in this example. In + // general, you are not required to implement it. That is, all you must implement + // are the four virtual functions defined below. + + + // So let's make an empty 9-dimensional PSI vector + feature_vector_type psi(get_num_dimensions()); + psi = 0; // zero initialize it + + // Now put a copy of x into the right place in PSI according to its label. So for + // example, if label is 1 then psi would be: [0 0 0 x(0) x(1) x(2) 0 0 0] + if (label == 0) + set_rowm(psi,range(0,2)) = x; + else if (label == 1) + set_rowm(psi,range(3,5)) = x; + else // the label must be 2 + set_rowm(psi,range(6,8)) = x; + + return psi; + } + + // We need to declare the dimensionality of the PSI vector (this is also the + // dimensionality of the weight vector we are learning). Similarly, we need to declare + // the number of training samples. We do this by defining the following virtual + // functions. + virtual long get_num_dimensions () const { return samples[0].size() * 3; } + virtual long get_num_samples () const { return samples.size(); } + + // In get_truth_joint_feature_vector(), all you have to do is output the PSI() vector + // for the idx-th training sample when it has its true label. So here it outputs + // PSI(samples[idx], labels[idx]). + virtual void get_truth_joint_feature_vector ( + long idx, + feature_vector_type& psi + ) const + { + psi = make_psi(samples[idx], labels[idx]); + } + + // separation_oracle() is more interesting. dlib's structural SVM solver will call + // separation_oracle() many times during the optimization. Each time it will give it + // the current value of the parameter weights and separation_oracle() is supposed to + // find the label that most violates the structural SVM objective function for the + // idx-th sample. Then the separation oracle reports the corresponding PSI vector and + // loss value. To state this more precisely, the separation_oracle() member function + // has the following contract: + // requires + // - 0 <= idx < get_num_samples() + // - current_solution.size() == get_num_dimensions() + // ensures + // - runs the separation oracle on the idx-th sample. We define this as follows: + // - let X == the idx-th training sample. + // - let PSI(X,y) == the joint feature vector for input X and an arbitrary label y. + // - let F(X,y) == dot(current_solution,PSI(X,y)). + // - let LOSS(idx,y) == the loss incurred for predicting that the idx-th sample + // has a label of y. Note that LOSS() should always be >= 0 and should + // become exactly 0 when y is the correct label for the idx-th sample. + // + // Then the separation oracle finds a Y such that: + // Y = argmax over all y: LOSS(idx,y) + F(X,y) + // (i.e. It finds the label which maximizes the above expression.) + // + // Finally, we can define the outputs of this function as: + // - #loss == LOSS(idx,Y) + // - #psi == PSI(X,Y) + virtual void separation_oracle ( + const long idx, + const column_vector& current_solution, + scalar_type& loss, + feature_vector_type& psi + ) const + { + // Note that the solver will use multiple threads to make concurrent calls to + // separation_oracle(), therefore, you must implement it in a thread safe manner + // (or disable threading by inheriting from structural_svm_problem instead of + // structural_svm_problem_threaded). However, if your separation oracle is not + // very fast to execute you can get a very significant speed boost by using the + // threaded solver. In general, all you need to do to make your separation oracle + // thread safe is to make sure it does not modify any global variables or members + // of three_class_classifier_problem. So it is usually easy to make thread safe. + + column_vector scores(3); + + // compute scores for each of the three classifiers + scores = dot(rowm(current_solution, range(0,2)), samples[idx]), + dot(rowm(current_solution, range(3,5)), samples[idx]), + dot(rowm(current_solution, range(6,8)), samples[idx]); + + // Add in the loss-augmentation. Recall that we maximize LOSS(idx,y) + F(X,y) in + // the separate oracle, not just F(X,y) as we normally would in predict_label(). + // Therefore, we must add in this extra amount to account for the loss-augmentation. + // For our simple multi-class classifier, we incur a loss of 1 if we don't predict + // the correct label and a loss of 0 if we get the right label. + if (labels[idx] != 0) + scores(0) += 1; + if (labels[idx] != 1) + scores(1) += 1; + if (labels[idx] != 2) + scores(2) += 1; + + // Now figure out which classifier has the largest loss-augmented score. + const int max_scoring_label = index_of_max(scores); + // And finally record the loss that was associated with that predicted label. + // Again, the loss is 1 if the label is incorrect and 0 otherwise. + if (max_scoring_label == labels[idx]) + loss = 0; + else + loss = 1; + + // Finally, compute the PSI vector corresponding to the label we just found and + // store it into psi for output. + psi = make_psi(samples[idx], max_scoring_label); + } + +private: + + // Here we hold onto the training data by reference. You can hold it by value or by + // any other method you like. + const std::vector<sample_type>& samples; + const std::vector<int>& labels; +}; + +// ---------------------------------------------------------------------------------------- + +// This function puts it all together. In here we use the three_class_classifier_problem +// along with dlib's oca cutting plane solver to find the optimal weights given our +// training data. +column_vector train_three_class_classifier ( + const std::vector<sample_type>& samples, + const std::vector<int>& labels +) +{ + const unsigned long num_threads = 4; + three_class_classifier_problem problem(samples, labels, num_threads); + + // Before we run the solver we set up some general parameters. First, + // you can set the C parameter of the structural SVM by calling set_c(). + problem.set_c(1); + + // The epsilon parameter controls the stopping tolerance. The optimizer will run until + // R(w) is within epsilon of its optimal value. If you don't set this then it defaults + // to 0.001. + problem.set_epsilon(0.0001); + + // Uncomment this and the optimizer will print its progress to standard out. You will + // be able to see things like the current risk gap. The optimizer continues until the + // risk gap is below epsilon. + //problem.be_verbose(); + + // The optimizer uses an internal cache to avoid unnecessary calls to your + // separation_oracle() routine. This parameter controls the size of that cache. + // Bigger values use more RAM and might make the optimizer run faster. You can also + // disable it by setting it to 0 which is good to do when your separation_oracle is + // very fast. If you don't call this function it defaults to a value of 5. + //problem.set_max_cache_size(20); + + + column_vector weights; + // Finally, we create the solver and then run it. + oca solver; + solver(problem, weights); + + // Alternatively, if you wanted to require that the learned weights are all + // non-negative then you can call the solver as follows and it will put a constraint on + // the optimization problem which causes all elements of weights to be >= 0. + //solver(problem, weights, problem.get_num_dimensions()); + + return weights; +} + +// ---------------------------------------------------------------------------------------- + diff --git a/ml/dlib/examples/svr_ex.cpp b/ml/dlib/examples/svr_ex.cpp new file mode 100644 index 00000000..a18edf24 --- /dev/null +++ b/ml/dlib/examples/svr_ex.cpp @@ -0,0 +1,96 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This is an example illustrating the use of the epsilon-insensitive support vector + regression object from the dlib C++ Library. + + In this example we will draw some points from the sinc() function and do a + non-linear regression on them. +*/ + +#include <iostream> +#include <vector> + +#include <dlib/svm.h> + +using namespace std; +using namespace dlib; + +// Here is the sinc function we will be trying to learn with the svr_trainer +// object. +double sinc(double x) +{ + if (x == 0) + return 1; + return sin(x)/x; +} + +int main() +{ + // Here we declare that our samples will be 1 dimensional column vectors. + typedef matrix<double,1,1> sample_type; + + // Now we are making a typedef for the kind of kernel we want to use. I picked the + // radial basis kernel because it only has one parameter and generally gives good + // results without much fiddling. + typedef radial_basis_kernel<sample_type> kernel_type; + + + std::vector<sample_type> samples; + std::vector<double> targets; + + // The first thing we do is pick a few training points from the sinc() function. + sample_type m; + for (double x = -10; x <= 4; x += 1) + { + m(0) = x; + + samples.push_back(m); + targets.push_back(sinc(x)); + } + + // Now setup a SVR trainer object. It has three parameters, the kernel and + // two parameters specific to SVR. + svr_trainer<kernel_type> trainer; + trainer.set_kernel(kernel_type(0.1)); + + // This parameter is the usual regularization parameter. It determines the trade-off + // between trying to reduce the training error or allowing more errors but hopefully + // improving the generalization of the resulting function. Larger values encourage exact + // fitting while smaller values of C may encourage better generalization. + trainer.set_c(10); + + // Epsilon-insensitive regression means we do regression but stop trying to fit a data + // point once it is "close enough" to its target value. This parameter is the value that + // controls what we mean by "close enough". In this case, I'm saying I'm happy if the + // resulting regression function gets within 0.001 of the target value. + trainer.set_epsilon_insensitivity(0.001); + + // Now do the training and save the results + decision_function<kernel_type> df = trainer.train(samples, targets); + + // now we output the value of the sinc function for a few test points as well as the + // value predicted by SVR. + m(0) = 2.5; cout << sinc(m(0)) << " " << df(m) << endl; + m(0) = 0.1; cout << sinc(m(0)) << " " << df(m) << endl; + m(0) = -4; cout << sinc(m(0)) << " " << df(m) << endl; + m(0) = 5.0; cout << sinc(m(0)) << " " << df(m) << endl; + + // The output is as follows: + // 0.239389 0.23905 + // 0.998334 0.997331 + // -0.189201 -0.187636 + // -0.191785 -0.218924 + + // The first column is the true value of the sinc function and the second + // column is the output from the SVR estimate. + + // We can also do 5-fold cross-validation and find the mean squared error and R-squared + // values. Note that we need to randomly shuffle the samples first. See the svm_ex.cpp + // for a discussion of why this is important. + randomize_samples(samples, targets); + cout << "MSE and R-Squared: "<< cross_validate_regression_trainer(trainer, samples, targets, 5) << endl; + // The output is: + // MSE and R-Squared: 1.65984e-05 0.999901 +} + + diff --git a/ml/dlib/examples/thread_function_ex.cpp b/ml/dlib/examples/thread_function_ex.cpp new file mode 100644 index 00000000..91825ffe --- /dev/null +++ b/ml/dlib/examples/thread_function_ex.cpp @@ -0,0 +1,71 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This is a very simple example that illustrates the use of the + thread_function object from the dlib C++ Library. + + The output of the programs should look like this: + + 45.6 + 9.999 + I have no args! + val: 3 +*/ + + +#include <iostream> +#include <dlib/threads.h> +#include <dlib/ref.h> + +using namespace dlib; +using namespace std; + +void thread_1(double a) +{ + cout << a << endl; +} + +void thread_2 () +{ + cout << "I have no args!" << endl; +} + +void thread_increment(double& a) +{ + a += 1; +} + +int main() +{ + // create a thread that will call thread_1(45.6) + thread_function t1(thread_1,45.6); + // wait for the t1 thread to end + t1.wait(); + + + // create a thread that will call thread_1(9.999) + thread_function t2(thread_1,9.999); + // wait for the t2 thread to end + t2.wait(); + + + // create a thread that will call thread_2() + thread_function t3(thread_2); + + + // Note that we can also use the ref() function to pass a variable + // to a thread by reference. For example, the thread below adds + // one to val. + double val = 2; + thread_function t4(thread_increment, dlib::ref(val)); + t4.wait(); // wait for t4 to finish before printing val. + // Print val. It will now have a value of 3. + cout << "val: " << val << endl; + + + + // At this point we will automatically wait for t3 to end because + // the destructor for thread_function objects always wait for their + // thread to terminate. +} + + diff --git a/ml/dlib/examples/thread_pool_ex.cpp b/ml/dlib/examples/thread_pool_ex.cpp new file mode 100644 index 00000000..e0a566ef --- /dev/null +++ b/ml/dlib/examples/thread_pool_ex.cpp @@ -0,0 +1,183 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the thread_pool + object from the dlib C++ Library. + + + In this example we will crate a thread pool with 3 threads and then show a + few different ways to send tasks to the pool. +*/ + + +#include <dlib/threads.h> +#include <dlib/misc_api.h> // for dlib::sleep +#include <dlib/logger.h> +#include <vector> + +using namespace dlib; + +// We will be using the dlib logger object to print messages in this example +// because its output is timestamped and labeled with the thread that the log +// message came from. This will make it easier to see what is going on in this +// example. Here we make an instance of the logger. See the logger +// documentation and examples for detailed information regarding its use. +logger dlog("main"); + + +// Here we make an instance of the thread pool object. You could also use the +// global dlib::default_thread_pool(), which automatically selects the number of +// threads based on your hardware. But here let's make our own. +thread_pool tp(3); + +// ---------------------------------------------------------------------------------------- + +class test +{ + /* + The thread_pool accepts "tasks" from the user and schedules them for + execution in one of its threads when one becomes available. Each task + is just a request to call a function. So here we create a class called + test with a few member functions, which we will have the thread pool call + as tasks. + */ +public: + + void mytask() + { + dlog << LINFO << "mytask start"; + + dlib::future<int> var; + + var = 1; + + // Here we ask the thread pool to call this->subtask() and this->subtask2(). + // Note that calls to add_task() will return immediately if there is an + // available thread. However, if there isn't a thread ready then + // add_task() blocks until there is such a thread. Also, note that if + // mytask() is executed within the thread pool then calls to add_task() + // will execute the requested task within the calling thread in cases + // where the thread pool is full. This means it is always safe to spawn + // subtasks from within another task, which is what we are doing here. + tp.add_task(*this,&test::subtask,var); // schedule call to this->subtask(var) + tp.add_task(*this,&test::subtask2); // schedule call to this->subtask2() + + // Since var is a future, this line will wait for the test::subtask task to + // finish before allowing us to access the contents of var. Then var will + // return the integer it contains. In this case result will be assigned + // the value 2 since var was incremented by subtask(). + int result = var; + dlog << LINFO << "var = " << result; + + // Wait for all the tasks we have started to finish. Note that + // wait_for_all_tasks() only waits for tasks which were started by the + // calling thread. So you don't have to worry about other unrelated + // parts of your application interfering. In this case it just waits + // for subtask2() to finish. + tp.wait_for_all_tasks(); + + dlog << LINFO << "mytask end" ; + } + + void subtask(int& a) + { + dlib::sleep(200); + a = a + 1; + dlog << LINFO << "subtask end "; + } + + void subtask2() + { + dlib::sleep(300); + dlog << LINFO << "subtask2 end "; + } + +}; + +// ---------------------------------------------------------------------------------------- + +int main() try +{ + // tell the logger to print out everything + dlog.set_level(LALL); + + + dlog << LINFO << "schedule a few tasks"; + + test taskobj; + // Schedule the thread pool to call taskobj.mytask(). Note that all forms of + // add_task() pass in the task object by reference. This means you must make sure, + // in this case, that taskobj isn't destructed until after the task has finished + // executing. + tp.add_task(taskobj, &test::mytask); + + // This behavior of add_task() enables it to guarantee that no memory allocations + // occur after the thread_pool has been constructed, so long as the user doesn't + // call any of the add_task_by_value() routines. The future object also doesn't + // perform any memory allocations or contain any system resources such as mutex + // objects. If you don't care about memory allocations then you will likely find + // the add_task_by_value() interface more convenient to use, which is shown below. + + + + // If we call add_task_by_value() we pass task objects to a thread pool by value. + // So in this case we don't have to worry about keeping our own instance of the + // task. Here we create a lambda function and pass it right in and everything + // works like it should. + dlib::future<int> num = 3; + tp.add_task_by_value([](int& val){val += 7;}, num); // adds 7 to num + int result = num.get(); + dlog << LINFO << "result = " << result; // prints result = 10 + + + // dlib also contains dlib::async(), which is essentially identical to std::async() + // except that it launches tasks to a dlib::thread_pool (using add_task_by_value) + // rather than starting an unbounded number of threads. As an example, here we + // make 10 different tasks, each assigns a different value into the elements of the + // vector vect. + std::vector<std::future<unsigned long>> vect(10); + for (unsigned long i = 0; i < vect.size(); ++i) + vect[i] = dlib::async(tp, [i]() { return i*i; }); + // Print the results + for (unsigned long i = 0; i < vect.size(); ++i) + dlog << LINFO << "vect["<<i<<"]: " << vect[i].get(); + + + // Finally, it's usually a good idea to wait for all your tasks to complete. + // Moreover, if any of your tasks threw an exception then waiting for the tasks + // will rethrow the exception in the calling context, allowing you to handle it in + // your local thread. Also, if you don't wait for the tasks and there is an + // exception and you allow the thread pool to be destructed your program will be + // terminated. So don't ignore exceptions :) + tp.wait_for_all_tasks(); + + + /* A possible run of this program might produce the following output (the first + column is the time the log message occurred and the value in [] is the thread + id for the thread that generated the log message): + + 0 INFO [0] main: schedule a few tasks + 0 INFO [1] main: task start + 0 INFO [0] main: result = 10 + 200 INFO [2] main: subtask end + 200 INFO [1] main: var = 2 + 200 INFO [0] main: vect[0]: 0 + 200 INFO [0] main: vect[1]: 1 + 200 INFO [0] main: vect[2]: 4 + 200 INFO [0] main: vect[3]: 9 + 200 INFO [0] main: vect[4]: 16 + 200 INFO [0] main: vect[5]: 25 + 200 INFO [0] main: vect[6]: 36 + 200 INFO [0] main: vect[7]: 49 + 200 INFO [0] main: vect[8]: 64 + 200 INFO [0] main: vect[9]: 81 + 300 INFO [3] main: subtask2 end + 300 INFO [1] main: task end + */ +} +catch(std::exception& e) +{ + std::cout << e.what() << std::endl; +} + + diff --git a/ml/dlib/examples/threaded_object_ex.cpp b/ml/dlib/examples/threaded_object_ex.cpp new file mode 100644 index 00000000..84fe1026 --- /dev/null +++ b/ml/dlib/examples/threaded_object_ex.cpp @@ -0,0 +1,79 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the threaded_object + from the dlib C++ Library. + + + This is a very simple example. It creates a single thread that + just prints messages to the screen. +*/ + + +#include <iostream> +#include <dlib/threads.h> +#include <dlib/misc_api.h> // for dlib::sleep + +using namespace std; +using namespace dlib; + +class my_object : public threaded_object +{ +public: + my_object() + { + // Start our thread going in the thread() function + start(); + } + + ~my_object() + { + // Tell the thread() function to stop. This will cause should_stop() to + // return true so the thread knows what to do. + stop(); + + // Wait for the thread to stop before letting this object destruct itself. + // Also note, you are *required* to wait for the thread to end before + // letting this object destruct itself. + wait(); + } + +private: + + void thread() + { + // This is our thread. It will loop until it is told that it should terminate. + while (should_stop() == false) + { + cout << "hurray threads!" << endl; + dlib::sleep(500); + } + } +}; + +int main() +{ + // Create an instance of our threaded object. + my_object t; + + dlib::sleep(4000); + + // Tell the threaded object to pause its thread. This causes the + // thread to block on its next call to should_stop(). + t.pause(); + + dlib::sleep(3000); + cout << "starting thread back up from paused state" << endl; + + // Tell the thread to unpause itself. This causes should_stop() to unblock + // and to let the thread continue. + t.start(); + + dlib::sleep(4000); + + // Let the program end. When t is destructed it will gracefully terminate your + // thread because we have set the destructor up to do so. +} + + + diff --git a/ml/dlib/examples/threads_ex.cpp b/ml/dlib/examples/threads_ex.cpp new file mode 100644 index 00000000..f0f1e914 --- /dev/null +++ b/ml/dlib/examples/threads_ex.cpp @@ -0,0 +1,93 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt + +/* + + This is an example illustrating the use of the threading api from the dlib + C++ Library. + + + This is a very simple example. It makes some threads and just waits for + them to terminate. It should be noted that this example shows how to use + the lowest level of the dlib threading API. Often, other higher level tools + are more appropriate. For examples of higher level tools see the + documentation on the pipe, thread_pool, thread_function, or + threaded_object. +*/ + + +#include <iostream> +#include <dlib/threads.h> +#include <dlib/misc_api.h> // for dlib::sleep + +using namespace std; +using namespace dlib; + +int thread_count = 10; +dlib::mutex count_mutex; // This is a mutex we will use to guard the thread_count variable. Note that the mutex doesn't know + // anything about the thread_count variable. Only our usage of a mutex determines what it guards. + // In this case we are going to make sure this mutex is always locked before we touch the + // thread_count variable. + +signaler count_signaler(count_mutex); // This is a signaler we will use to signal when + // the thread_count variable is changed. Note that it is + // associated with the count_mutex. This means that + // when you call count_signaler.wait() it will automatically + // unlock count_mutex for you. + + +void test_thread (void*) +{ + // just sleep for a second + dlib::sleep(1000); + + // Now signal that this thread is ending. First we should get a lock on the + // count_mutex so we can safely mess with thread_count. A convenient way to do this + // is to use an auto_mutex object. Its constructor takes a mutex object and locks + // it right away, it then unlocks the mutex when the auto_mutex object is destructed. + // Note that this happens even if an exception is thrown. So it ensures that you + // don't somehow quit your function without unlocking your mutex. + auto_mutex locker(count_mutex); + --thread_count; + // Now we signal this change. This will cause one thread that is currently waiting + // on a call to count_signaler.wait() to unblock. + count_signaler.signal(); + + // At the end of this function locker goes out of scope and gets destructed, thus + // unlocking count_mutex for us. +} + +int main() +{ + + cout << "Create some threads" << endl; + for (int i = 0; i < thread_count; ++i) + { + // Create some threads. This 0 we are passing in here is the argument that gets + // passed to the thread function (a void pointer) but we aren't using it in this + // example program so i'm just using 0. + create_new_thread(test_thread,0); + } + cout << "Done creating threads, now we wait for them to end" << endl; + + + // Again we use an auto_mutex to get a lock. We don't have to do it this way + // but it is convenient. Also note that we can name the auto_mutex object anything. + auto_mutex some_random_unused_name(count_mutex); + + // Now we wait in a loop for thread_count to be 0. Note that it is important to do this in a + // loop because it is possible to get spurious wakeups from calls to wait() on some + // platforms. So this guards against that and it also makes the code easy to understand. + while (thread_count > 0) + count_signaler.wait(); // This puts this thread to sleep until we get a signal to look at the + // thread_count variable. It also unlocks the count_mutex before it + // goes to sleep and then relocks it when it wakes back up. Again, + // note that it is possible for wait() to return even if no one signals you. + // This is just weird junk you have to deal with on some platforms. So + // don't try to be clever and write code that depends on the number of + // times wait() returns because it won't always work. + + + cout << "All threads done, ending program" << endl; +} + + diff --git a/ml/dlib/examples/timer_ex.cpp b/ml/dlib/examples/timer_ex.cpp new file mode 100644 index 00000000..e1d55492 --- /dev/null +++ b/ml/dlib/examples/timer_ex.cpp @@ -0,0 +1,56 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt + + +/* + This is an example illustrating the use of the timer object from the dlib C++ Library. + + The timer is an object that calls some user specified member function at regular + intervals from another thread. +*/ + + +#include <dlib/timer.h> +#include <dlib/misc_api.h> // for dlib::sleep +#include <iostream> + +using namespace dlib; +using namespace std; + +// ---------------------------------------------------------------------------------------- + +class timer_example +{ +public: + void action_function() + { + // print out a message so we can see that this function is being triggered + cout << "action_function() called" << endl; + } +}; + +// ---------------------------------------------------------------------------------------- + +int main() +{ + timer_example e; + + // Here we construct our timer object. It needs two things. The second argument is + // the member function it is going to call at regular intervals and the first argument + // is the object instance it will call that member function on. + timer<timer_example> t(e, &timer_example::action_function); + + // Set the timer object to trigger every second + t.set_delay_time(1000); + + // Start the timer. It will start calling the action function 1 second from this call + // to start. + t.start(); + + // Sleep for 10 seconds before letting the program end. + dlib::sleep(10000); + + // The timer will destruct itself properly and stop calling the action_function. +} + +// ---------------------------------------------------------------------------------------- + diff --git a/ml/dlib/examples/train_object_detector.cpp b/ml/dlib/examples/train_object_detector.cpp new file mode 100644 index 00000000..9bc0977c --- /dev/null +++ b/ml/dlib/examples/train_object_detector.cpp @@ -0,0 +1,422 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example showing how you might use dlib to create a reasonably + functional command line tool for object detection. This example assumes + you are familiar with the contents of at least the following example + programs: + - fhog_object_detector_ex.cpp + - compress_stream_ex.cpp + + + + + This program is a command line tool for learning to detect objects in images. + Therefore, to create an object detector it requires a set of annotated training + images. To create this annotated data you will need to use the imglab tool + included with dlib. It is located in the tools/imglab folder and can be compiled + using the following commands. + cd tools/imglab + mkdir build + cd build + cmake .. + cmake --build . --config Release + Note that you may need to install CMake (www.cmake.org) for this to work. + + Next, let's assume you have a folder of images called /tmp/images. These images + should contain examples of the objects you want to learn to detect. You will + use the imglab tool to label these objects. Do this by typing the following + ./imglab -c mydataset.xml /tmp/images + This will create a file called mydataset.xml which simply lists the images in + /tmp/images. To annotate them run + ./imglab mydataset.xml + A window will appear showing all the images. You can use the up and down arrow + keys to cycle though the images and the mouse to label objects. In particular, + holding the shift key, left clicking, and dragging the mouse will allow you to + draw boxes around the objects you wish to detect. So next, label all the objects + with boxes. Note that it is important to label all the objects since any object + not labeled is implicitly assumed to be not an object we should detect. If there + are objects you are not sure about you should draw a box around them, then double + click the box and press i. This will cross out the box and mark it as "ignore". + The training code in dlib will then simply ignore detections matching that box. + + + Once you finish labeling objects go to the file menu, click save, and then close + the program. This will save the object boxes back to mydataset.xml. You can verify + this by opening the tool again with + ./imglab mydataset.xml + and observing that the boxes are present. + + Returning to the present example program, we can compile it using cmake just as we + did with the imglab tool. Once compiled, we can issue the command + ./train_object_detector -tv mydataset.xml + which will train an object detection model based on our labeled data. The model + will be saved to the file object_detector.svm. Once this has finished we can use + the object detector to locate objects in new images with a command like + ./train_object_detector some_image.png + This command will display some_image.png in a window and any detected objects will + be indicated by a red box. + + Finally, to make running this example easy dlib includes some training data in the + examples/faces folder. Therefore, you can test this program out by running the + following sequence of commands: + ./train_object_detector -tv examples/faces/training.xml -u1 --flip + ./train_object_detector --test examples/faces/testing.xml -u1 + ./train_object_detector examples/faces/*.jpg -u1 + That will make a face detector that performs perfectly on the test images listed in + testing.xml and then it will show you the detections on all the images. +*/ + + +#include <dlib/svm_threaded.h> +#include <dlib/string.h> +#include <dlib/gui_widgets.h> +#include <dlib/image_processing.h> +#include <dlib/data_io.h> +#include <dlib/cmd_line_parser.h> + + +#include <iostream> +#include <fstream> + + +using namespace std; +using namespace dlib; + +// ---------------------------------------------------------------------------------------- + +void pick_best_window_size ( + const std::vector<std::vector<rectangle> >& boxes, + unsigned long& width, + unsigned long& height, + const unsigned long target_size +) +/*! + ensures + - Finds the average aspect ratio of the elements of boxes and outputs a width + and height such that the aspect ratio is equal to the average and also the + area is equal to target_size. That is, the following will be approximately true: + - #width*#height == target_size + - #width/#height == the average aspect ratio of the elements of boxes. +!*/ +{ + // find the average width and height + running_stats<double> avg_width, avg_height; + for (unsigned long i = 0; i < boxes.size(); ++i) + { + for (unsigned long j = 0; j < boxes[i].size(); ++j) + { + avg_width.add(boxes[i][j].width()); + avg_height.add(boxes[i][j].height()); + } + } + + // now adjust the box size so that it is about target_pixels pixels in size + double size = avg_width.mean()*avg_height.mean(); + double scale = std::sqrt(target_size/size); + + width = (unsigned long)(avg_width.mean()*scale+0.5); + height = (unsigned long)(avg_height.mean()*scale+0.5); + // make sure the width and height never round to zero. + if (width == 0) + width = 1; + if (height == 0) + height = 1; +} + +// ---------------------------------------------------------------------------------------- + +bool contains_any_boxes ( + const std::vector<std::vector<rectangle> >& boxes +) +{ + for (unsigned long i = 0; i < boxes.size(); ++i) + { + if (boxes[i].size() != 0) + return true; + } + return false; +} + +// ---------------------------------------------------------------------------------------- + +void throw_invalid_box_error_message ( + const std::string& dataset_filename, + const std::vector<std::vector<rectangle> >& removed, + const unsigned long target_size +) +{ + image_dataset_metadata::dataset data; + load_image_dataset_metadata(data, dataset_filename); + + std::ostringstream sout; + sout << "Error! An impossible set of object boxes was given for training. "; + sout << "All the boxes need to have a similar aspect ratio and also not be "; + sout << "smaller than about " << target_size << " pixels in area. "; + sout << "The following images contain invalid boxes:\n"; + std::ostringstream sout2; + for (unsigned long i = 0; i < removed.size(); ++i) + { + if (removed[i].size() != 0) + { + const std::string imgname = data.images[i].filename; + sout2 << " " << imgname << "\n"; + } + } + throw error("\n"+wrap_string(sout.str()) + "\n" + sout2.str()); +} + +// ---------------------------------------------------------------------------------------- + +int main(int argc, char** argv) +{ + try + { + command_line_parser parser; + parser.add_option("h","Display this help message."); + parser.add_option("t","Train an object detector and save the detector to disk."); + parser.add_option("cross-validate", + "Perform cross-validation on an image dataset and print the results."); + parser.add_option("test", "Test a trained detector on an image dataset and print the results."); + parser.add_option("u", "Upsample each input image <arg> times. Each upsampling quadruples the number of pixels in the image (default: 0).", 1); + + parser.set_group_name("training/cross-validation sub-options"); + parser.add_option("v","Be verbose."); + parser.add_option("folds","When doing cross-validation, do <arg> folds (default: 3).",1); + parser.add_option("c","Set the SVM C parameter to <arg> (default: 1.0).",1); + parser.add_option("threads", "Use <arg> threads for training (default: 4).",1); + parser.add_option("eps", "Set training epsilon to <arg> (default: 0.01).", 1); + parser.add_option("target-size", "Set size of the sliding window to about <arg> pixels in area (default: 80*80).", 1); + parser.add_option("flip", "Add left/right flipped copies of the images into the training dataset. Useful when the objects " + "you want to detect are left/right symmetric."); + + + parser.parse(argc, argv); + + // Now we do a little command line validation. Each of the following functions + // checks something and throws an exception if the test fails. + const char* one_time_opts[] = {"h", "v", "t", "cross-validate", "c", "threads", "target-size", + "folds", "test", "eps", "u", "flip"}; + parser.check_one_time_options(one_time_opts); // Can't give an option more than once + // Make sure the arguments to these options are within valid ranges if they are supplied by the user. + parser.check_option_arg_range("c", 1e-12, 1e12); + parser.check_option_arg_range("eps", 1e-5, 1e4); + parser.check_option_arg_range("threads", 1, 1000); + parser.check_option_arg_range("folds", 2, 100); + parser.check_option_arg_range("u", 0, 8); + parser.check_option_arg_range("target-size", 4*4, 10000*10000); + const char* incompatible[] = {"t", "cross-validate", "test"}; + parser.check_incompatible_options(incompatible); + // You are only allowed to give these training_sub_ops if you also give either -t or --cross-validate. + const char* training_ops[] = {"t", "cross-validate"}; + const char* training_sub_ops[] = {"v", "c", "threads", "target-size", "eps", "flip"}; + parser.check_sub_options(training_ops, training_sub_ops); + parser.check_sub_option("cross-validate", "folds"); + + + if (parser.option("h")) + { + cout << "Usage: train_object_detector [options] <image dataset file|image file>\n"; + parser.print_options(); + + return EXIT_SUCCESS; + } + + + typedef scan_fhog_pyramid<pyramid_down<6> > image_scanner_type; + // Get the upsample option from the user but use 0 if it wasn't given. + const unsigned long upsample_amount = get_option(parser, "u", 0); + + if (parser.option("t") || parser.option("cross-validate")) + { + if (parser.number_of_arguments() != 1) + { + cout << "You must give an image dataset metadata XML file produced by the imglab tool." << endl; + cout << "\nTry the -h option for more information." << endl; + return EXIT_FAILURE; + } + + dlib::array<array2d<unsigned char> > images; + std::vector<std::vector<rectangle> > object_locations, ignore; + + cout << "Loading image dataset from metadata file " << parser[0] << endl; + ignore = load_image_dataset(images, object_locations, parser[0]); + cout << "Number of images loaded: " << images.size() << endl; + + // Get the options from the user, but use default values if they are not + // supplied. + const int threads = get_option(parser, "threads", 4); + const double C = get_option(parser, "c", 1.0); + const double eps = get_option(parser, "eps", 0.01); + unsigned int num_folds = get_option(parser, "folds", 3); + const unsigned long target_size = get_option(parser, "target-size", 80*80); + // You can't do more folds than there are images. + if (num_folds > images.size()) + num_folds = images.size(); + + // Upsample images if the user asked us to do that. + for (unsigned long i = 0; i < upsample_amount; ++i) + upsample_image_dataset<pyramid_down<2> >(images, object_locations, ignore); + + + image_scanner_type scanner; + unsigned long width, height; + pick_best_window_size(object_locations, width, height, target_size); + scanner.set_detection_window_size(width, height); + + structural_object_detection_trainer<image_scanner_type> trainer(scanner); + trainer.set_num_threads(threads); + if (parser.option("v")) + trainer.be_verbose(); + trainer.set_c(C); + trainer.set_epsilon(eps); + + // Now make sure all the boxes are obtainable by the scanner. + std::vector<std::vector<rectangle> > removed; + removed = remove_unobtainable_rectangles(trainer, images, object_locations); + // if we weren't able to get all the boxes to match then throw an error + if (contains_any_boxes(removed)) + { + unsigned long scale = upsample_amount+1; + scale = scale*scale; + throw_invalid_box_error_message(parser[0], removed, target_size/scale); + } + + if (parser.option("flip")) + add_image_left_right_flips(images, object_locations, ignore); + + if (parser.option("t")) + { + // Do the actual training and save the results into the detector object. + object_detector<image_scanner_type> detector = trainer.train(images, object_locations, ignore); + + cout << "Saving trained detector to object_detector.svm" << endl; + serialize("object_detector.svm") << detector; + + cout << "Testing detector on training data..." << endl; + cout << "Test detector (precision,recall,AP): " << test_object_detection_function(detector, images, object_locations, ignore) << endl; + } + else + { + // shuffle the order of the training images + randomize_samples(images, object_locations); + + cout << num_folds << "-fold cross validation (precision,recall,AP): " + << cross_validate_object_detection_trainer(trainer, images, object_locations, ignore, num_folds) << endl; + } + + cout << "Parameters used: " << endl; + cout << " threads: "<< threads << endl; + cout << " C: "<< C << endl; + cout << " eps: "<< eps << endl; + cout << " target-size: "<< target_size << endl; + cout << " detection window width: "<< width << endl; + cout << " detection window height: "<< height << endl; + cout << " upsample this many times : "<< upsample_amount << endl; + if (parser.option("flip")) + cout << " trained using left/right flips." << endl; + if (parser.option("cross-validate")) + cout << " num_folds: "<< num_folds << endl; + cout << endl; + + return EXIT_SUCCESS; + } + + + + + + + + // The rest of the code is devoted to testing an already trained object detector. + + if (parser.number_of_arguments() == 0) + { + cout << "You must give an image or an image dataset metadata XML file produced by the imglab tool." << endl; + cout << "\nTry the -h option for more information." << endl; + return EXIT_FAILURE; + } + + // load a previously trained object detector and try it out on some data + ifstream fin("object_detector.svm", ios::binary); + if (!fin) + { + cout << "Can't find a trained object detector file object_detector.svm. " << endl; + cout << "You need to train one using the -t option." << endl; + cout << "\nTry the -h option for more information." << endl; + return EXIT_FAILURE; + + } + object_detector<image_scanner_type> detector; + deserialize(detector, fin); + + dlib::array<array2d<unsigned char> > images; + // Check if the command line argument is an XML file + if (tolower(right_substr(parser[0],".")) == "xml") + { + std::vector<std::vector<rectangle> > object_locations, ignore; + cout << "Loading image dataset from metadata file " << parser[0] << endl; + ignore = load_image_dataset(images, object_locations, parser[0]); + cout << "Number of images loaded: " << images.size() << endl; + + // Upsample images if the user asked us to do that. + for (unsigned long i = 0; i < upsample_amount; ++i) + upsample_image_dataset<pyramid_down<2> >(images, object_locations, ignore); + + if (parser.option("test")) + { + cout << "Testing detector on data..." << endl; + cout << "Results (precision,recall,AP): " << test_object_detection_function(detector, images, object_locations, ignore) << endl; + return EXIT_SUCCESS; + } + } + else + { + // In this case, the user should have given some image files. So just + // load them. + images.resize(parser.number_of_arguments()); + for (unsigned long i = 0; i < images.size(); ++i) + load_image(images[i], parser[i]); + + // Upsample images if the user asked us to do that. + for (unsigned long i = 0; i < upsample_amount; ++i) + { + for (unsigned long j = 0; j < images.size(); ++j) + pyramid_up(images[j]); + } + } + + + // Test the detector on the images we loaded and display the results + // in a window. + image_window win; + for (unsigned long i = 0; i < images.size(); ++i) + { + // Run the detector on images[i] + const std::vector<rectangle> rects = detector(images[i]); + cout << "Number of detections: "<< rects.size() << endl; + + // Put the image and detections into the window. + win.clear_overlay(); + win.set_image(images[i]); + win.add_overlay(rects, rgb_pixel(255,0,0)); + + cout << "Hit enter to see the next image."; + cin.get(); + } + + + } + catch (exception& e) + { + cout << "\nexception thrown!" << endl; + cout << e.what() << endl; + cout << "\nTry the -h option for more information." << endl; + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} + +// ---------------------------------------------------------------------------------------- + diff --git a/ml/dlib/examples/train_shape_predictor_ex.cpp b/ml/dlib/examples/train_shape_predictor_ex.cpp new file mode 100644 index 00000000..05eaf4b0 --- /dev/null +++ b/ml/dlib/examples/train_shape_predictor_ex.cpp @@ -0,0 +1,198 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This example program shows how to use dlib's implementation of the paper: + One Millisecond Face Alignment with an Ensemble of Regression Trees by + Vahid Kazemi and Josephine Sullivan, CVPR 2014 + + In particular, we will train a face landmarking model based on a small dataset + and then evaluate it. If you want to visualize the output of the trained + model on some images then you can run the face_landmark_detection_ex.cpp + example program with sp.dat as the input model. + + It should also be noted that this kind of model, while often used for face + landmarking, is quite general and can be used for a variety of shape + prediction tasks. But here we demonstrate it only on a simple face + landmarking task. +*/ + + +#include <dlib/image_processing.h> +#include <dlib/data_io.h> +#include <iostream> + +using namespace dlib; +using namespace std; + +// ---------------------------------------------------------------------------------------- + +std::vector<std::vector<double> > get_interocular_distances ( + const std::vector<std::vector<full_object_detection> >& objects +); +/*! + ensures + - returns an object D such that: + - D[i][j] == the distance, in pixels, between the eyes for the face represented + by objects[i][j]. +!*/ + +// ---------------------------------------------------------------------------------------- + +int main(int argc, char** argv) +{ + try + { + // In this example we are going to train a shape_predictor based on the + // small faces dataset in the examples/faces directory. So the first + // thing we do is load that dataset. This means you need to supply the + // path to this faces folder as a command line argument so we will know + // where it is. + if (argc != 2) + { + cout << "Give the path to the examples/faces directory as the argument to this" << endl; + cout << "program. For example, if you are in the examples folder then execute " << endl; + cout << "this program by running: " << endl; + cout << " ./train_shape_predictor_ex faces" << endl; + cout << endl; + return 0; + } + const std::string faces_directory = argv[1]; + // The faces directory contains a training dataset and a separate + // testing dataset. The training data consists of 4 images, each + // annotated with rectangles that bound each human face along with 68 + // face landmarks on each face. The idea is to use this training data + // to learn to identify the position of landmarks on human faces in new + // images. + // + // Once you have trained a shape_predictor it is always important to + // test it on data it wasn't trained on. Therefore, we will also load + // a separate testing set of 5 images. Once we have a shape_predictor + // created from the training data we will see how well it works by + // running it on the testing images. + // + // So here we create the variables that will hold our dataset. + // images_train will hold the 4 training images and faces_train holds + // the locations and poses of each face in the training images. So for + // example, the image images_train[0] has the faces given by the + // full_object_detections in faces_train[0]. + dlib::array<array2d<unsigned char> > images_train, images_test; + std::vector<std::vector<full_object_detection> > faces_train, faces_test; + + // Now we load the data. These XML files list the images in each + // dataset and also contain the positions of the face boxes and + // landmarks (called parts in the XML file). Obviously you can use any + // kind of input format you like so long as you store the data into + // images_train and faces_train. But for convenience dlib comes with + // tools for creating and loading XML image dataset files. Here you see + // how to load the data. To create the XML files you can use the imglab + // tool which can be found in the tools/imglab folder. It is a simple + // graphical tool for labeling objects in images. To see how to use it + // read the tools/imglab/README.txt file. + load_image_dataset(images_train, faces_train, faces_directory+"/training_with_face_landmarks.xml"); + load_image_dataset(images_test, faces_test, faces_directory+"/testing_with_face_landmarks.xml"); + + // Now make the object responsible for training the model. + shape_predictor_trainer trainer; + // This algorithm has a bunch of parameters you can mess with. The + // documentation for the shape_predictor_trainer explains all of them. + // You should also read Kazemi's paper which explains all the parameters + // in great detail. However, here I'm just setting three of them + // differently than their default values. I'm doing this because we + // have a very small dataset. In particular, setting the oversampling + // to a high amount (300) effectively boosts the training set size, so + // that helps this example. + trainer.set_oversampling_amount(300); + // I'm also reducing the capacity of the model by explicitly increasing + // the regularization (making nu smaller) and by using trees with + // smaller depths. + trainer.set_nu(0.05); + trainer.set_tree_depth(2); + + // some parts of training process can be parallelized. + // Trainer will use this count of threads when possible + trainer.set_num_threads(2); + + // Tell the trainer to print status messages to the console so we can + // see how long the training will take. + trainer.be_verbose(); + + // Now finally generate the shape model + shape_predictor sp = trainer.train(images_train, faces_train); + + + // Now that we have a model we can test it. This function measures the + // average distance between a face landmark output by the + // shape_predictor and where it should be according to the truth data. + // Note that there is an optional 4th argument that lets us rescale the + // distances. Here we are causing the output to scale each face's + // distances by the interocular distance, as is customary when + // evaluating face landmarking systems. + cout << "mean training error: "<< + test_shape_predictor(sp, images_train, faces_train, get_interocular_distances(faces_train)) << endl; + + // The real test is to see how well it does on data it wasn't trained + // on. We trained it on a very small dataset so the accuracy is not + // extremely high, but it's still doing quite good. Moreover, if you + // train it on one of the large face landmarking datasets you will + // obtain state-of-the-art results, as shown in the Kazemi paper. + cout << "mean testing error: "<< + test_shape_predictor(sp, images_test, faces_test, get_interocular_distances(faces_test)) << endl; + + // Finally, we save the model to disk so we can use it later. + serialize("sp.dat") << sp; + } + catch (exception& e) + { + cout << "\nexception thrown!" << endl; + cout << e.what() << endl; + } +} + +// ---------------------------------------------------------------------------------------- + +double interocular_distance ( + const full_object_detection& det +) +{ + dlib::vector<double,2> l, r; + double cnt = 0; + // Find the center of the left eye by averaging the points around + // the eye. + for (unsigned long i = 36; i <= 41; ++i) + { + l += det.part(i); + ++cnt; + } + l /= cnt; + + // Find the center of the right eye by averaging the points around + // the eye. + cnt = 0; + for (unsigned long i = 42; i <= 47; ++i) + { + r += det.part(i); + ++cnt; + } + r /= cnt; + + // Now return the distance between the centers of the eyes + return length(l-r); +} + +std::vector<std::vector<double> > get_interocular_distances ( + const std::vector<std::vector<full_object_detection> >& objects +) +{ + std::vector<std::vector<double> > temp(objects.size()); + for (unsigned long i = 0; i < objects.size(); ++i) + { + for (unsigned long j = 0; j < objects[i].size(); ++j) + { + temp[i].push_back(interocular_distance(objects[i][j])); + } + } + return temp; +} + +// ---------------------------------------------------------------------------------------- + diff --git a/ml/dlib/examples/using_custom_kernels_ex.cpp b/ml/dlib/examples/using_custom_kernels_ex.cpp new file mode 100644 index 00000000..f0cac690 --- /dev/null +++ b/ml/dlib/examples/using_custom_kernels_ex.cpp @@ -0,0 +1,208 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example showing how to define custom kernel functions for use with + the machine learning tools in the dlib C++ Library. + + This example assumes you are somewhat familiar with the machine learning + tools in dlib. In particular, you should be familiar with the krr_trainer + and the matrix object. So you may want to read the krr_classification_ex.cpp + and matrix_ex.cpp example programs if you haven't already. +*/ + + +#include <iostream> +#include <dlib/svm.h> + +using namespace std; +using namespace dlib; + +// ---------------------------------------------------------------------------------------- + +/* + Here we define our new kernel. It is the UKF kernel from + Facilitating the applications of support vector machine by using a new kernel + by Rui Zhang and Wenjian Wang. + + + + In the context of the dlib library a kernel function object is an object with + an interface with the following properties: + - a public typedef named sample_type + - a public typedef named scalar_type which should be a float, double, or + long double type. + - an overloaded operator() that operates on two items of sample_type + and returns a scalar_type. + - a public typedef named mem_manager_type that is an implementation of + dlib/memory_manager/memory_manager_kernel_abstract.h or + dlib/memory_manager_global/memory_manager_global_kernel_abstract.h or + dlib/memory_manager_stateless/memory_manager_stateless_kernel_abstract.h + - an overloaded == operator that tells you if two kernels are + identical or not. + + Below we define such a beast for the UKF kernel. In this case we are expecting the + sample type (i.e. the T type) to be a dlib::matrix. However, note that you can design + kernels which operate on any type you like so long as you meet the above requirements. +*/ + +template < typename T > +struct ukf_kernel +{ + typedef typename T::type scalar_type; + typedef T sample_type; + // If your sample type, the T, doesn't have a memory manager then + // you can use dlib::default_memory_manager here. + typedef typename T::mem_manager_type mem_manager_type; + + ukf_kernel(const scalar_type g) : sigma(g) {} + ukf_kernel() : sigma(0.1) {} + + scalar_type sigma; + + scalar_type operator() ( + const sample_type& a, + const sample_type& b + ) const + { + // This is the formula for the UKF kernel from the above referenced paper. + return 1/(length_squared(a-b) + sigma); + } + + bool operator== ( + const ukf_kernel& k + ) const + { + return sigma == k.sigma; + } +}; + +// ---------------------------------------------------------------------------------------- + +/* + Here we define serialize() and deserialize() functions for our new kernel. Defining + these functions is optional. However, if you don't define them you won't be able + to save your learned decision_function objects to disk. +*/ + +template < typename T > +void serialize ( const ukf_kernel<T>& item, std::ostream& out) +{ + // save the state of the kernel to the output stream + serialize(item.sigma, out); +} + +template < typename T > +void deserialize ( ukf_kernel<T>& item, std::istream& in ) +{ + deserialize(item.sigma, in); +} + +// ---------------------------------------------------------------------------------------- + +/* + This next thing, the kernel_derivative specialization is optional. You only need + to define it if you want to use the dlib::reduced2() or dlib::approximate_distance_function() + routines. If so, then you need to supply code for computing the derivative of your kernel as + shown below. Note also that you can only do this if your kernel operates on dlib::matrix + objects which represent column vectors. +*/ + +namespace dlib +{ + template < typename T > + struct kernel_derivative<ukf_kernel<T> > + { + typedef typename T::type scalar_type; + typedef T sample_type; + typedef typename T::mem_manager_type mem_manager_type; + + kernel_derivative(const ukf_kernel<T>& k_) : k(k_){} + + sample_type operator() (const sample_type& x, const sample_type& y) const + { + // return the derivative of the ukf kernel with respect to the second argument (i.e. y) + return 2*(x-y)*std::pow(k(x,y),2); + } + + const ukf_kernel<T>& k; + }; +} + +// ---------------------------------------------------------------------------------------- + +int main() +{ + // We are going to be working with 2 dimensional samples and trying to perform + // binary classification on them using our new ukf_kernel. + typedef matrix<double, 2, 1> sample_type; + + typedef ukf_kernel<sample_type> kernel_type; + + + // Now let's generate some training data + std::vector<sample_type> samples; + std::vector<double> labels; + for (double r = -20; r <= 20; r += 0.9) + { + for (double c = -20; c <= 20; c += 0.9) + { + sample_type samp; + samp(0) = r; + samp(1) = c; + samples.push_back(samp); + + // if this point is less than 13 from the origin + if (sqrt(r*r + c*c) <= 13) + labels.push_back(+1); + else + labels.push_back(-1); + + } + } + cout << "samples generated: " << samples.size() << endl; + cout << " number of +1 samples: " << sum(mat(labels) > 0) << endl; + cout << " number of -1 samples: " << sum(mat(labels) < 0) << endl; + + + // A valid kernel must always give rise to kernel matrices which are symmetric + // and positive semidefinite (i.e. have nonnegative eigenvalues). This next + // bit of code makes a kernel matrix and checks if it has these properties. + const matrix<double> K = kernel_matrix(kernel_type(0.1), randomly_subsample(samples, 500)); + cout << "\nIs it symmetric? (this value should be 0): "<< min(abs(K - trans(K))) << endl; + cout << "Smallest eigenvalue (should be >= 0): " << min(real_eigenvalues(K)) << endl; + + + // here we make an instance of the krr_trainer object that uses our new kernel. + krr_trainer<kernel_type> trainer; + trainer.use_classification_loss_for_loo_cv(); + + + // Finally, let's test how good our new kernel is by doing some leave-one-out cross-validation. + cout << "\ndoing leave-one-out cross-validation" << endl; + for (double sigma = 0.01; sigma <= 100; sigma *= 3) + { + // tell the trainer the parameters we want to use + trainer.set_kernel(kernel_type(sigma)); + + std::vector<double> loo_values; + trainer.train(samples, labels, loo_values); + + // Print sigma and the fraction of samples correctly classified during LOO cross-validation. + const double classification_accuracy = mean_sign_agreement(labels, loo_values); + cout << "sigma: " << sigma << " LOO accuracy: " << classification_accuracy << endl; + } + + + + + const kernel_type kern(10); + // Since it is very easy to make a mistake while coding a derivative it is a good idea + // to compare your derivative function against a numerical approximation and see if + // the results are similar. If they are very different then you probably made a + // mistake. So here we compare the results at a test point. + cout << "\nThese vectors should match, if they don't then we coded the kernel_derivative wrong!" << endl; + cout << "approximate derivative: \n" << derivative(kern)(samples[0],samples[100]) << endl; + cout << "exact derivative: \n" << kernel_derivative<kernel_type>(kern)(samples[0],samples[100]) << endl; + +} + diff --git a/ml/dlib/examples/video_frames/frame_000100.jpg b/ml/dlib/examples/video_frames/frame_000100.jpg Binary files differnew file mode 100644 index 00000000..938b04d5 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000100.jpg diff --git a/ml/dlib/examples/video_frames/frame_000101.jpg b/ml/dlib/examples/video_frames/frame_000101.jpg Binary files differnew file mode 100644 index 00000000..13f14928 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000101.jpg diff --git a/ml/dlib/examples/video_frames/frame_000102.jpg b/ml/dlib/examples/video_frames/frame_000102.jpg Binary files differnew file mode 100644 index 00000000..d656b8ed --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000102.jpg diff --git a/ml/dlib/examples/video_frames/frame_000103.jpg b/ml/dlib/examples/video_frames/frame_000103.jpg Binary files differnew file mode 100644 index 00000000..fc299023 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000103.jpg diff --git a/ml/dlib/examples/video_frames/frame_000104.jpg b/ml/dlib/examples/video_frames/frame_000104.jpg Binary files differnew file mode 100644 index 00000000..2e0c38a6 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000104.jpg diff --git a/ml/dlib/examples/video_frames/frame_000105.jpg b/ml/dlib/examples/video_frames/frame_000105.jpg Binary files differnew file mode 100644 index 00000000..e28b9089 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000105.jpg diff --git a/ml/dlib/examples/video_frames/frame_000106.jpg b/ml/dlib/examples/video_frames/frame_000106.jpg Binary files differnew file mode 100644 index 00000000..fa87399e --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000106.jpg diff --git a/ml/dlib/examples/video_frames/frame_000107.jpg b/ml/dlib/examples/video_frames/frame_000107.jpg Binary files differnew file mode 100644 index 00000000..d7c1e966 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000107.jpg diff --git a/ml/dlib/examples/video_frames/frame_000108.jpg b/ml/dlib/examples/video_frames/frame_000108.jpg Binary files differnew file mode 100644 index 00000000..0203b161 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000108.jpg diff --git a/ml/dlib/examples/video_frames/frame_000109.jpg b/ml/dlib/examples/video_frames/frame_000109.jpg Binary files differnew file mode 100644 index 00000000..e8d496d5 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000109.jpg diff --git a/ml/dlib/examples/video_frames/frame_000110.jpg b/ml/dlib/examples/video_frames/frame_000110.jpg Binary files differnew file mode 100644 index 00000000..46da463f --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000110.jpg diff --git a/ml/dlib/examples/video_frames/frame_000111.jpg b/ml/dlib/examples/video_frames/frame_000111.jpg Binary files differnew file mode 100644 index 00000000..696ea3f6 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000111.jpg diff --git a/ml/dlib/examples/video_frames/frame_000112.jpg b/ml/dlib/examples/video_frames/frame_000112.jpg Binary files differnew file mode 100644 index 00000000..b2aaedf9 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000112.jpg diff --git a/ml/dlib/examples/video_frames/frame_000113.jpg b/ml/dlib/examples/video_frames/frame_000113.jpg Binary files differnew file mode 100644 index 00000000..9beb88a0 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000113.jpg diff --git a/ml/dlib/examples/video_frames/frame_000114.jpg b/ml/dlib/examples/video_frames/frame_000114.jpg Binary files differnew file mode 100644 index 00000000..ae8de40e --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000114.jpg diff --git a/ml/dlib/examples/video_frames/frame_000115.jpg b/ml/dlib/examples/video_frames/frame_000115.jpg Binary files differnew file mode 100644 index 00000000..7682a690 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000115.jpg diff --git a/ml/dlib/examples/video_frames/frame_000116.jpg b/ml/dlib/examples/video_frames/frame_000116.jpg Binary files differnew file mode 100644 index 00000000..90b1109a --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000116.jpg diff --git a/ml/dlib/examples/video_frames/frame_000117.jpg b/ml/dlib/examples/video_frames/frame_000117.jpg Binary files differnew file mode 100644 index 00000000..388c914c --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000117.jpg diff --git a/ml/dlib/examples/video_frames/frame_000118.jpg b/ml/dlib/examples/video_frames/frame_000118.jpg Binary files differnew file mode 100644 index 00000000..e77a1e55 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000118.jpg diff --git a/ml/dlib/examples/video_frames/frame_000119.jpg b/ml/dlib/examples/video_frames/frame_000119.jpg Binary files differnew file mode 100644 index 00000000..7a19fe43 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000119.jpg diff --git a/ml/dlib/examples/video_frames/frame_000120.jpg b/ml/dlib/examples/video_frames/frame_000120.jpg Binary files differnew file mode 100644 index 00000000..ab73738d --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000120.jpg diff --git a/ml/dlib/examples/video_frames/frame_000121.jpg b/ml/dlib/examples/video_frames/frame_000121.jpg Binary files differnew file mode 100644 index 00000000..03c4705b --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000121.jpg diff --git a/ml/dlib/examples/video_frames/frame_000122.jpg b/ml/dlib/examples/video_frames/frame_000122.jpg Binary files differnew file mode 100644 index 00000000..13c17dc5 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000122.jpg diff --git a/ml/dlib/examples/video_frames/frame_000123.jpg b/ml/dlib/examples/video_frames/frame_000123.jpg Binary files differnew file mode 100644 index 00000000..64ef5ea1 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000123.jpg diff --git a/ml/dlib/examples/video_frames/frame_000124.jpg b/ml/dlib/examples/video_frames/frame_000124.jpg Binary files differnew file mode 100644 index 00000000..a9f1cb61 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000124.jpg diff --git a/ml/dlib/examples/video_frames/frame_000125.jpg b/ml/dlib/examples/video_frames/frame_000125.jpg Binary files differnew file mode 100644 index 00000000..57fc8a24 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000125.jpg diff --git a/ml/dlib/examples/video_frames/frame_000126.jpg b/ml/dlib/examples/video_frames/frame_000126.jpg Binary files differnew file mode 100644 index 00000000..435b104f --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000126.jpg diff --git a/ml/dlib/examples/video_frames/frame_000127.jpg b/ml/dlib/examples/video_frames/frame_000127.jpg Binary files differnew file mode 100644 index 00000000..bb21d1a6 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000127.jpg diff --git a/ml/dlib/examples/video_frames/frame_000128.jpg b/ml/dlib/examples/video_frames/frame_000128.jpg Binary files differnew file mode 100644 index 00000000..bcd47fd0 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000128.jpg diff --git a/ml/dlib/examples/video_frames/frame_000129.jpg b/ml/dlib/examples/video_frames/frame_000129.jpg Binary files differnew file mode 100644 index 00000000..871250df --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000129.jpg diff --git a/ml/dlib/examples/video_frames/frame_000130.jpg b/ml/dlib/examples/video_frames/frame_000130.jpg Binary files differnew file mode 100644 index 00000000..84efad32 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000130.jpg diff --git a/ml/dlib/examples/video_frames/frame_000131.jpg b/ml/dlib/examples/video_frames/frame_000131.jpg Binary files differnew file mode 100644 index 00000000..04def55d --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000131.jpg diff --git a/ml/dlib/examples/video_frames/frame_000132.jpg b/ml/dlib/examples/video_frames/frame_000132.jpg Binary files differnew file mode 100644 index 00000000..ca2992c3 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000132.jpg diff --git a/ml/dlib/examples/video_frames/frame_000133.jpg b/ml/dlib/examples/video_frames/frame_000133.jpg Binary files differnew file mode 100644 index 00000000..094d5c67 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000133.jpg diff --git a/ml/dlib/examples/video_frames/frame_000134.jpg b/ml/dlib/examples/video_frames/frame_000134.jpg Binary files differnew file mode 100644 index 00000000..e8c33137 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000134.jpg diff --git a/ml/dlib/examples/video_frames/frame_000135.jpg b/ml/dlib/examples/video_frames/frame_000135.jpg Binary files differnew file mode 100644 index 00000000..6786acb9 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000135.jpg diff --git a/ml/dlib/examples/video_frames/frame_000136.jpg b/ml/dlib/examples/video_frames/frame_000136.jpg Binary files differnew file mode 100644 index 00000000..431db0c2 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000136.jpg diff --git a/ml/dlib/examples/video_frames/frame_000137.jpg b/ml/dlib/examples/video_frames/frame_000137.jpg Binary files differnew file mode 100644 index 00000000..e945be61 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000137.jpg diff --git a/ml/dlib/examples/video_frames/frame_000138.jpg b/ml/dlib/examples/video_frames/frame_000138.jpg Binary files differnew file mode 100644 index 00000000..41762ced --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000138.jpg diff --git a/ml/dlib/examples/video_frames/frame_000139.jpg b/ml/dlib/examples/video_frames/frame_000139.jpg Binary files differnew file mode 100644 index 00000000..5f8ca40e --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000139.jpg diff --git a/ml/dlib/examples/video_frames/frame_000140.jpg b/ml/dlib/examples/video_frames/frame_000140.jpg Binary files differnew file mode 100644 index 00000000..912ab03e --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000140.jpg diff --git a/ml/dlib/examples/video_frames/frame_000141.jpg b/ml/dlib/examples/video_frames/frame_000141.jpg Binary files differnew file mode 100644 index 00000000..7a247a2f --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000141.jpg diff --git a/ml/dlib/examples/video_frames/frame_000142.jpg b/ml/dlib/examples/video_frames/frame_000142.jpg Binary files differnew file mode 100644 index 00000000..a2c51b58 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000142.jpg diff --git a/ml/dlib/examples/video_frames/frame_000143.jpg b/ml/dlib/examples/video_frames/frame_000143.jpg Binary files differnew file mode 100644 index 00000000..11631fec --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000143.jpg diff --git a/ml/dlib/examples/video_frames/frame_000144.jpg b/ml/dlib/examples/video_frames/frame_000144.jpg Binary files differnew file mode 100644 index 00000000..d0366c12 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000144.jpg diff --git a/ml/dlib/examples/video_frames/frame_000145.jpg b/ml/dlib/examples/video_frames/frame_000145.jpg Binary files differnew file mode 100644 index 00000000..853fac3e --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000145.jpg diff --git a/ml/dlib/examples/video_frames/frame_000146.jpg b/ml/dlib/examples/video_frames/frame_000146.jpg Binary files differnew file mode 100644 index 00000000..fc9972df --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000146.jpg diff --git a/ml/dlib/examples/video_frames/frame_000147.jpg b/ml/dlib/examples/video_frames/frame_000147.jpg Binary files differnew file mode 100644 index 00000000..3937fae5 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000147.jpg diff --git a/ml/dlib/examples/video_frames/frame_000148.jpg b/ml/dlib/examples/video_frames/frame_000148.jpg Binary files differnew file mode 100644 index 00000000..ad088458 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000148.jpg diff --git a/ml/dlib/examples/video_frames/frame_000149.jpg b/ml/dlib/examples/video_frames/frame_000149.jpg Binary files differnew file mode 100644 index 00000000..2776e083 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000149.jpg diff --git a/ml/dlib/examples/video_frames/frame_000150.jpg b/ml/dlib/examples/video_frames/frame_000150.jpg Binary files differnew file mode 100644 index 00000000..c6f80b94 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000150.jpg diff --git a/ml/dlib/examples/video_frames/frame_000151.jpg b/ml/dlib/examples/video_frames/frame_000151.jpg Binary files differnew file mode 100644 index 00000000..aa7bbe27 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000151.jpg diff --git a/ml/dlib/examples/video_frames/frame_000152.jpg b/ml/dlib/examples/video_frames/frame_000152.jpg Binary files differnew file mode 100644 index 00000000..ad1259b3 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000152.jpg diff --git a/ml/dlib/examples/video_frames/frame_000153.jpg b/ml/dlib/examples/video_frames/frame_000153.jpg Binary files differnew file mode 100644 index 00000000..0b1b84f1 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000153.jpg diff --git a/ml/dlib/examples/video_frames/frame_000154.jpg b/ml/dlib/examples/video_frames/frame_000154.jpg Binary files differnew file mode 100644 index 00000000..9be6d25f --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000154.jpg diff --git a/ml/dlib/examples/video_frames/frame_000155.jpg b/ml/dlib/examples/video_frames/frame_000155.jpg Binary files differnew file mode 100644 index 00000000..7a891331 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000155.jpg diff --git a/ml/dlib/examples/video_frames/frame_000156.jpg b/ml/dlib/examples/video_frames/frame_000156.jpg Binary files differnew file mode 100644 index 00000000..584d7047 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000156.jpg diff --git a/ml/dlib/examples/video_frames/frame_000157.jpg b/ml/dlib/examples/video_frames/frame_000157.jpg Binary files differnew file mode 100644 index 00000000..057d8de5 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000157.jpg diff --git a/ml/dlib/examples/video_frames/frame_000158.jpg b/ml/dlib/examples/video_frames/frame_000158.jpg Binary files differnew file mode 100644 index 00000000..755d9a4e --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000158.jpg diff --git a/ml/dlib/examples/video_frames/frame_000159.jpg b/ml/dlib/examples/video_frames/frame_000159.jpg Binary files differnew file mode 100644 index 00000000..a19e4b67 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000159.jpg diff --git a/ml/dlib/examples/video_frames/frame_000160.jpg b/ml/dlib/examples/video_frames/frame_000160.jpg Binary files differnew file mode 100644 index 00000000..f577895b --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000160.jpg diff --git a/ml/dlib/examples/video_frames/frame_000161.jpg b/ml/dlib/examples/video_frames/frame_000161.jpg Binary files differnew file mode 100644 index 00000000..fe407f34 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000161.jpg diff --git a/ml/dlib/examples/video_frames/frame_000162.jpg b/ml/dlib/examples/video_frames/frame_000162.jpg Binary files differnew file mode 100644 index 00000000..fd611c5d --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000162.jpg diff --git a/ml/dlib/examples/video_frames/frame_000163.jpg b/ml/dlib/examples/video_frames/frame_000163.jpg Binary files differnew file mode 100644 index 00000000..054daa9e --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000163.jpg diff --git a/ml/dlib/examples/video_frames/frame_000164.jpg b/ml/dlib/examples/video_frames/frame_000164.jpg Binary files differnew file mode 100644 index 00000000..b7891a4f --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000164.jpg diff --git a/ml/dlib/examples/video_frames/frame_000165.jpg b/ml/dlib/examples/video_frames/frame_000165.jpg Binary files differnew file mode 100644 index 00000000..273d02a9 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000165.jpg diff --git a/ml/dlib/examples/video_frames/frame_000166.jpg b/ml/dlib/examples/video_frames/frame_000166.jpg Binary files differnew file mode 100644 index 00000000..53dc257a --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000166.jpg diff --git a/ml/dlib/examples/video_frames/frame_000167.jpg b/ml/dlib/examples/video_frames/frame_000167.jpg Binary files differnew file mode 100644 index 00000000..b1e52c94 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000167.jpg diff --git a/ml/dlib/examples/video_frames/frame_000168.jpg b/ml/dlib/examples/video_frames/frame_000168.jpg Binary files differnew file mode 100644 index 00000000..6650df13 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000168.jpg diff --git a/ml/dlib/examples/video_frames/frame_000169.jpg b/ml/dlib/examples/video_frames/frame_000169.jpg Binary files differnew file mode 100644 index 00000000..8811ac06 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000169.jpg diff --git a/ml/dlib/examples/video_frames/frame_000170.jpg b/ml/dlib/examples/video_frames/frame_000170.jpg Binary files differnew file mode 100644 index 00000000..e5db6d86 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000170.jpg diff --git a/ml/dlib/examples/video_frames/frame_000171.jpg b/ml/dlib/examples/video_frames/frame_000171.jpg Binary files differnew file mode 100644 index 00000000..01b78123 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000171.jpg diff --git a/ml/dlib/examples/video_frames/frame_000172.jpg b/ml/dlib/examples/video_frames/frame_000172.jpg Binary files differnew file mode 100644 index 00000000..dd423a43 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000172.jpg diff --git a/ml/dlib/examples/video_frames/frame_000173.jpg b/ml/dlib/examples/video_frames/frame_000173.jpg Binary files differnew file mode 100644 index 00000000..e9921b5f --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000173.jpg diff --git a/ml/dlib/examples/video_frames/frame_000174.jpg b/ml/dlib/examples/video_frames/frame_000174.jpg Binary files differnew file mode 100644 index 00000000..c334e3f5 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000174.jpg diff --git a/ml/dlib/examples/video_frames/frame_000175.jpg b/ml/dlib/examples/video_frames/frame_000175.jpg Binary files differnew file mode 100644 index 00000000..b43abe8e --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000175.jpg diff --git a/ml/dlib/examples/video_frames/frame_000176.jpg b/ml/dlib/examples/video_frames/frame_000176.jpg Binary files differnew file mode 100644 index 00000000..a816a898 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000176.jpg diff --git a/ml/dlib/examples/video_frames/frame_000177.jpg b/ml/dlib/examples/video_frames/frame_000177.jpg Binary files differnew file mode 100644 index 00000000..f8542da3 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000177.jpg diff --git a/ml/dlib/examples/video_frames/frame_000178.jpg b/ml/dlib/examples/video_frames/frame_000178.jpg Binary files differnew file mode 100644 index 00000000..ab6c4d73 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000178.jpg diff --git a/ml/dlib/examples/video_frames/frame_000179.jpg b/ml/dlib/examples/video_frames/frame_000179.jpg Binary files differnew file mode 100644 index 00000000..c07be3ad --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000179.jpg diff --git a/ml/dlib/examples/video_frames/frame_000180.jpg b/ml/dlib/examples/video_frames/frame_000180.jpg Binary files differnew file mode 100644 index 00000000..9e6a503b --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000180.jpg diff --git a/ml/dlib/examples/video_frames/frame_000181.jpg b/ml/dlib/examples/video_frames/frame_000181.jpg Binary files differnew file mode 100644 index 00000000..cc03f8cb --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000181.jpg diff --git a/ml/dlib/examples/video_frames/frame_000182.jpg b/ml/dlib/examples/video_frames/frame_000182.jpg Binary files differnew file mode 100644 index 00000000..fe93728c --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000182.jpg diff --git a/ml/dlib/examples/video_frames/frame_000183.jpg b/ml/dlib/examples/video_frames/frame_000183.jpg Binary files differnew file mode 100644 index 00000000..96ba792f --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000183.jpg diff --git a/ml/dlib/examples/video_frames/frame_000184.jpg b/ml/dlib/examples/video_frames/frame_000184.jpg Binary files differnew file mode 100644 index 00000000..b43db9ea --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000184.jpg diff --git a/ml/dlib/examples/video_frames/frame_000185.jpg b/ml/dlib/examples/video_frames/frame_000185.jpg Binary files differnew file mode 100644 index 00000000..0b31bb23 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000185.jpg diff --git a/ml/dlib/examples/video_frames/frame_000186.jpg b/ml/dlib/examples/video_frames/frame_000186.jpg Binary files differnew file mode 100644 index 00000000..44156dd7 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000186.jpg diff --git a/ml/dlib/examples/video_frames/frame_000187.jpg b/ml/dlib/examples/video_frames/frame_000187.jpg Binary files differnew file mode 100644 index 00000000..2f972be3 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000187.jpg diff --git a/ml/dlib/examples/video_frames/frame_000188.jpg b/ml/dlib/examples/video_frames/frame_000188.jpg Binary files differnew file mode 100644 index 00000000..fc4c2fbe --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000188.jpg diff --git a/ml/dlib/examples/video_frames/frame_000189.jpg b/ml/dlib/examples/video_frames/frame_000189.jpg Binary files differnew file mode 100644 index 00000000..1ef51a23 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000189.jpg diff --git a/ml/dlib/examples/video_frames/frame_000190.jpg b/ml/dlib/examples/video_frames/frame_000190.jpg Binary files differnew file mode 100644 index 00000000..d5f65c1f --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000190.jpg diff --git a/ml/dlib/examples/video_frames/frame_000191.jpg b/ml/dlib/examples/video_frames/frame_000191.jpg Binary files differnew file mode 100644 index 00000000..00f7e8dc --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000191.jpg diff --git a/ml/dlib/examples/video_frames/frame_000192.jpg b/ml/dlib/examples/video_frames/frame_000192.jpg Binary files differnew file mode 100644 index 00000000..6a8de8bf --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000192.jpg diff --git a/ml/dlib/examples/video_frames/frame_000193.jpg b/ml/dlib/examples/video_frames/frame_000193.jpg Binary files differnew file mode 100644 index 00000000..7a8e5b49 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000193.jpg diff --git a/ml/dlib/examples/video_frames/frame_000194.jpg b/ml/dlib/examples/video_frames/frame_000194.jpg Binary files differnew file mode 100644 index 00000000..d338701d --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000194.jpg diff --git a/ml/dlib/examples/video_frames/frame_000195.jpg b/ml/dlib/examples/video_frames/frame_000195.jpg Binary files differnew file mode 100644 index 00000000..85b758f3 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000195.jpg diff --git a/ml/dlib/examples/video_frames/frame_000196.jpg b/ml/dlib/examples/video_frames/frame_000196.jpg Binary files differnew file mode 100644 index 00000000..624fc138 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000196.jpg diff --git a/ml/dlib/examples/video_frames/frame_000197.jpg b/ml/dlib/examples/video_frames/frame_000197.jpg Binary files differnew file mode 100644 index 00000000..733cf4e3 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000197.jpg diff --git a/ml/dlib/examples/video_frames/frame_000198.jpg b/ml/dlib/examples/video_frames/frame_000198.jpg Binary files differnew file mode 100644 index 00000000..fc42278f --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000198.jpg diff --git a/ml/dlib/examples/video_frames/frame_000199.jpg b/ml/dlib/examples/video_frames/frame_000199.jpg Binary files differnew file mode 100644 index 00000000..079c3211 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000199.jpg diff --git a/ml/dlib/examples/video_frames/frame_000200.jpg b/ml/dlib/examples/video_frames/frame_000200.jpg Binary files differnew file mode 100644 index 00000000..5eeb398c --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000200.jpg diff --git a/ml/dlib/examples/video_frames/frame_000201.jpg b/ml/dlib/examples/video_frames/frame_000201.jpg Binary files differnew file mode 100644 index 00000000..053858a8 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000201.jpg diff --git a/ml/dlib/examples/video_frames/frame_000202.jpg b/ml/dlib/examples/video_frames/frame_000202.jpg Binary files differnew file mode 100644 index 00000000..366c06d3 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000202.jpg diff --git a/ml/dlib/examples/video_frames/frame_000203.jpg b/ml/dlib/examples/video_frames/frame_000203.jpg Binary files differnew file mode 100644 index 00000000..f789ff5b --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000203.jpg diff --git a/ml/dlib/examples/video_frames/frame_000204.jpg b/ml/dlib/examples/video_frames/frame_000204.jpg Binary files differnew file mode 100644 index 00000000..c0616ac1 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000204.jpg diff --git a/ml/dlib/examples/video_frames/frame_000205.jpg b/ml/dlib/examples/video_frames/frame_000205.jpg Binary files differnew file mode 100644 index 00000000..4994bc6b --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000205.jpg diff --git a/ml/dlib/examples/video_frames/frame_000206.jpg b/ml/dlib/examples/video_frames/frame_000206.jpg Binary files differnew file mode 100644 index 00000000..8ca39e3d --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000206.jpg diff --git a/ml/dlib/examples/video_frames/frame_000207.jpg b/ml/dlib/examples/video_frames/frame_000207.jpg Binary files differnew file mode 100644 index 00000000..1cc9f95a --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000207.jpg diff --git a/ml/dlib/examples/video_frames/frame_000208.jpg b/ml/dlib/examples/video_frames/frame_000208.jpg Binary files differnew file mode 100644 index 00000000..3d4e82cd --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000208.jpg diff --git a/ml/dlib/examples/video_frames/frame_000209.jpg b/ml/dlib/examples/video_frames/frame_000209.jpg Binary files differnew file mode 100644 index 00000000..2a965250 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000209.jpg diff --git a/ml/dlib/examples/video_frames/frame_000210.jpg b/ml/dlib/examples/video_frames/frame_000210.jpg Binary files differnew file mode 100644 index 00000000..4b1f034a --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000210.jpg diff --git a/ml/dlib/examples/video_frames/frame_000211.jpg b/ml/dlib/examples/video_frames/frame_000211.jpg Binary files differnew file mode 100644 index 00000000..67f6d13b --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000211.jpg diff --git a/ml/dlib/examples/video_frames/frame_000212.jpg b/ml/dlib/examples/video_frames/frame_000212.jpg Binary files differnew file mode 100644 index 00000000..cae3ac6e --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000212.jpg diff --git a/ml/dlib/examples/video_frames/frame_000213.jpg b/ml/dlib/examples/video_frames/frame_000213.jpg Binary files differnew file mode 100644 index 00000000..21bb9ab4 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000213.jpg diff --git a/ml/dlib/examples/video_frames/frame_000214.jpg b/ml/dlib/examples/video_frames/frame_000214.jpg Binary files differnew file mode 100644 index 00000000..881cef7e --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000214.jpg diff --git a/ml/dlib/examples/video_frames/frame_000215.jpg b/ml/dlib/examples/video_frames/frame_000215.jpg Binary files differnew file mode 100644 index 00000000..9a7ff9e8 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000215.jpg diff --git a/ml/dlib/examples/video_frames/frame_000216.jpg b/ml/dlib/examples/video_frames/frame_000216.jpg Binary files differnew file mode 100644 index 00000000..a7335739 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000216.jpg diff --git a/ml/dlib/examples/video_frames/frame_000217.jpg b/ml/dlib/examples/video_frames/frame_000217.jpg Binary files differnew file mode 100644 index 00000000..f590c51c --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000217.jpg diff --git a/ml/dlib/examples/video_frames/frame_000218.jpg b/ml/dlib/examples/video_frames/frame_000218.jpg Binary files differnew file mode 100644 index 00000000..66c3c251 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000218.jpg diff --git a/ml/dlib/examples/video_frames/frame_000219.jpg b/ml/dlib/examples/video_frames/frame_000219.jpg Binary files differnew file mode 100644 index 00000000..c637d61a --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000219.jpg diff --git a/ml/dlib/examples/video_frames/frame_000220.jpg b/ml/dlib/examples/video_frames/frame_000220.jpg Binary files differnew file mode 100644 index 00000000..3e4fc3d1 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000220.jpg diff --git a/ml/dlib/examples/video_frames/frame_000221.jpg b/ml/dlib/examples/video_frames/frame_000221.jpg Binary files differnew file mode 100644 index 00000000..1e173636 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000221.jpg diff --git a/ml/dlib/examples/video_frames/frame_000222.jpg b/ml/dlib/examples/video_frames/frame_000222.jpg Binary files differnew file mode 100644 index 00000000..ff77b346 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000222.jpg diff --git a/ml/dlib/examples/video_frames/frame_000223.jpg b/ml/dlib/examples/video_frames/frame_000223.jpg Binary files differnew file mode 100644 index 00000000..6c107288 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000223.jpg diff --git a/ml/dlib/examples/video_frames/frame_000224.jpg b/ml/dlib/examples/video_frames/frame_000224.jpg Binary files differnew file mode 100644 index 00000000..3c0a0e9b --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000224.jpg diff --git a/ml/dlib/examples/video_frames/frame_000225.jpg b/ml/dlib/examples/video_frames/frame_000225.jpg Binary files differnew file mode 100644 index 00000000..2c07fffb --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000225.jpg diff --git a/ml/dlib/examples/video_frames/frame_000226.jpg b/ml/dlib/examples/video_frames/frame_000226.jpg Binary files differnew file mode 100644 index 00000000..4695b5cd --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000226.jpg diff --git a/ml/dlib/examples/video_frames/frame_000227.jpg b/ml/dlib/examples/video_frames/frame_000227.jpg Binary files differnew file mode 100644 index 00000000..920faeea --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000227.jpg diff --git a/ml/dlib/examples/video_frames/frame_000228.jpg b/ml/dlib/examples/video_frames/frame_000228.jpg Binary files differnew file mode 100644 index 00000000..9d3de28f --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000228.jpg diff --git a/ml/dlib/examples/video_frames/frame_000229.jpg b/ml/dlib/examples/video_frames/frame_000229.jpg Binary files differnew file mode 100644 index 00000000..e8c0444c --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000229.jpg diff --git a/ml/dlib/examples/video_frames/frame_000230.jpg b/ml/dlib/examples/video_frames/frame_000230.jpg Binary files differnew file mode 100644 index 00000000..1e65a4db --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000230.jpg diff --git a/ml/dlib/examples/video_frames/frame_000231.jpg b/ml/dlib/examples/video_frames/frame_000231.jpg Binary files differnew file mode 100644 index 00000000..7a86ac53 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000231.jpg diff --git a/ml/dlib/examples/video_frames/frame_000232.jpg b/ml/dlib/examples/video_frames/frame_000232.jpg Binary files differnew file mode 100644 index 00000000..2f38dba4 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000232.jpg diff --git a/ml/dlib/examples/video_frames/frame_000233.jpg b/ml/dlib/examples/video_frames/frame_000233.jpg Binary files differnew file mode 100644 index 00000000..c72735c2 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000233.jpg diff --git a/ml/dlib/examples/video_frames/frame_000234.jpg b/ml/dlib/examples/video_frames/frame_000234.jpg Binary files differnew file mode 100644 index 00000000..bec85f21 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000234.jpg diff --git a/ml/dlib/examples/video_frames/frame_000235.jpg b/ml/dlib/examples/video_frames/frame_000235.jpg Binary files differnew file mode 100644 index 00000000..425bf3f3 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000235.jpg diff --git a/ml/dlib/examples/video_frames/frame_000236.jpg b/ml/dlib/examples/video_frames/frame_000236.jpg Binary files differnew file mode 100644 index 00000000..0ca67ddc --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000236.jpg diff --git a/ml/dlib/examples/video_frames/frame_000237.jpg b/ml/dlib/examples/video_frames/frame_000237.jpg Binary files differnew file mode 100644 index 00000000..8d6581b9 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000237.jpg diff --git a/ml/dlib/examples/video_frames/frame_000238.jpg b/ml/dlib/examples/video_frames/frame_000238.jpg Binary files differnew file mode 100644 index 00000000..22cf36f7 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000238.jpg diff --git a/ml/dlib/examples/video_frames/frame_000239.jpg b/ml/dlib/examples/video_frames/frame_000239.jpg Binary files differnew file mode 100644 index 00000000..bb368be6 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000239.jpg diff --git a/ml/dlib/examples/video_frames/frame_000240.jpg b/ml/dlib/examples/video_frames/frame_000240.jpg Binary files differnew file mode 100644 index 00000000..4073cddd --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000240.jpg diff --git a/ml/dlib/examples/video_frames/frame_000241.jpg b/ml/dlib/examples/video_frames/frame_000241.jpg Binary files differnew file mode 100644 index 00000000..d3347b20 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000241.jpg diff --git a/ml/dlib/examples/video_frames/frame_000242.jpg b/ml/dlib/examples/video_frames/frame_000242.jpg Binary files differnew file mode 100644 index 00000000..6df093f2 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000242.jpg diff --git a/ml/dlib/examples/video_frames/frame_000243.jpg b/ml/dlib/examples/video_frames/frame_000243.jpg Binary files differnew file mode 100644 index 00000000..c418887a --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000243.jpg diff --git a/ml/dlib/examples/video_frames/frame_000244.jpg b/ml/dlib/examples/video_frames/frame_000244.jpg Binary files differnew file mode 100644 index 00000000..d60fd8ca --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000244.jpg diff --git a/ml/dlib/examples/video_frames/frame_000245.jpg b/ml/dlib/examples/video_frames/frame_000245.jpg Binary files differnew file mode 100644 index 00000000..8e5ff86f --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000245.jpg diff --git a/ml/dlib/examples/video_frames/frame_000246.jpg b/ml/dlib/examples/video_frames/frame_000246.jpg Binary files differnew file mode 100644 index 00000000..795ba1c5 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000246.jpg diff --git a/ml/dlib/examples/video_frames/frame_000247.jpg b/ml/dlib/examples/video_frames/frame_000247.jpg Binary files differnew file mode 100644 index 00000000..5842e61c --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000247.jpg diff --git a/ml/dlib/examples/video_frames/frame_000248.jpg b/ml/dlib/examples/video_frames/frame_000248.jpg Binary files differnew file mode 100644 index 00000000..8d64586d --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000248.jpg diff --git a/ml/dlib/examples/video_frames/frame_000249.jpg b/ml/dlib/examples/video_frames/frame_000249.jpg Binary files differnew file mode 100644 index 00000000..6a314e4b --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000249.jpg diff --git a/ml/dlib/examples/video_frames/frame_000250.jpg b/ml/dlib/examples/video_frames/frame_000250.jpg Binary files differnew file mode 100644 index 00000000..d9ef51b2 --- /dev/null +++ b/ml/dlib/examples/video_frames/frame_000250.jpg diff --git a/ml/dlib/examples/video_frames/license.txt b/ml/dlib/examples/video_frames/license.txt new file mode 100644 index 00000000..d3b6ac69 --- /dev/null +++ b/ml/dlib/examples/video_frames/license.txt @@ -0,0 +1,6 @@ +Please read terms of use for the content of this zip file at this websites: +English: http://creativecommons.org/licenses/by-sa/3.0/de/deed.en +German: http://creativecommons.org/licenses/by-sa/3.0/de/ + + +Note that this video is from the BoBoT dataset (see http://www.iai.uni-bonn.de/~kleind/tracking/) but has been compressed a lot, cropped, and converted to grayscale to make the dlib archive file as small as possible. diff --git a/ml/dlib/examples/video_tracking_ex.cpp b/ml/dlib/examples/video_tracking_ex.cpp new file mode 100644 index 00000000..464baaf9 --- /dev/null +++ b/ml/dlib/examples/video_tracking_ex.cpp @@ -0,0 +1,72 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This example shows how to use the correlation_tracker from the dlib C++ library. This + object lets you track the position of an object as it moves from frame to frame in a + video sequence. To use it, you give the correlation_tracker the bounding box of the + object you want to track in the current video frame. Then it will identify the + location of the object in subsequent frames. + + In this particular example, we are going to run on the video sequence that comes with + dlib, which can be found in the examples/video_frames folder. This video shows a juice + box sitting on a table and someone is waving the camera around. The task is to track the + position of the juice box as the camera moves around. +*/ + +#include <dlib/image_processing.h> +#include <dlib/gui_widgets.h> +#include <dlib/image_io.h> +#include <dlib/dir_nav.h> + + +using namespace dlib; +using namespace std; + +int main(int argc, char** argv) try +{ + if (argc != 2) + { + cout << "Call this program like this: " << endl; + cout << "./video_tracking_ex ../video_frames" << endl; + return 1; + } + + // Get the list of video frames. + std::vector<file> files = get_files_in_directory_tree(argv[1], match_ending(".jpg")); + std::sort(files.begin(), files.end()); + if (files.size() == 0) + { + cout << "No images found in " << argv[1] << endl; + return 1; + } + + // Load the first frame. + array2d<unsigned char> img; + load_image(img, files[0]); + // Now create a tracker and start a track on the juice box. If you look at the first + // frame you will see that the juice box is centered at pixel point(92,110) and 38 + // pixels wide and 86 pixels tall. + correlation_tracker tracker; + tracker.start_track(img, centered_rect(point(93,110), 38, 86)); + + // Now run the tracker. All we have to do is call tracker.update() and it will keep + // track of the juice box! + image_window win; + for (unsigned long i = 1; i < files.size(); ++i) + { + load_image(img, files[i]); + tracker.update(img); + + win.set_image(img); + win.clear_overlay(); + win.add_overlay(tracker.get_position()); + + cout << "hit enter to process next frame" << endl; + cin.get(); + } +} +catch (std::exception& e) +{ + cout << e.what() << endl; +} + diff --git a/ml/dlib/examples/webcam_face_pose_ex.cpp b/ml/dlib/examples/webcam_face_pose_ex.cpp new file mode 100644 index 00000000..e3b00d0f --- /dev/null +++ b/ml/dlib/examples/webcam_face_pose_ex.cpp @@ -0,0 +1,100 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This example program shows how to find frontal human faces in an image and + estimate their pose. The pose takes the form of 68 landmarks. These are + points on the face such as the corners of the mouth, along the eyebrows, on + the eyes, and so forth. + + + This example is essentially just a version of the face_landmark_detection_ex.cpp + example modified to use OpenCV's VideoCapture object to read from a camera instead + of files. + + + Finally, note that the face detector is fastest when compiled with at least + SSE2 instructions enabled. So if you are using a PC with an Intel or AMD + chip then you should enable at least SSE2 instructions. If you are using + cmake to compile this program you can enable them by using one of the + following commands when you create the build project: + cmake path_to_dlib_root/examples -DUSE_SSE2_INSTRUCTIONS=ON + cmake path_to_dlib_root/examples -DUSE_SSE4_INSTRUCTIONS=ON + cmake path_to_dlib_root/examples -DUSE_AVX_INSTRUCTIONS=ON + This will set the appropriate compiler options for GCC, clang, Visual + Studio, or the Intel compiler. If you are using another compiler then you + need to consult your compiler's manual to determine how to enable these + instructions. Note that AVX is the fastest but requires a CPU from at least + 2011. SSE4 is the next fastest and is supported by most current machines. +*/ + +#include <dlib/opencv.h> +#include <opencv2/highgui/highgui.hpp> +#include <dlib/image_processing/frontal_face_detector.h> +#include <dlib/image_processing/render_face_detections.h> +#include <dlib/image_processing.h> +#include <dlib/gui_widgets.h> + +using namespace dlib; +using namespace std; + +int main() +{ + try + { + cv::VideoCapture cap(0); + if (!cap.isOpened()) + { + cerr << "Unable to connect to camera" << endl; + return 1; + } + + image_window win; + + // Load face detection and pose estimation models. + frontal_face_detector detector = get_frontal_face_detector(); + shape_predictor pose_model; + deserialize("shape_predictor_68_face_landmarks.dat") >> pose_model; + + // Grab and process frames until the main window is closed by the user. + while(!win.is_closed()) + { + // Grab a frame + cv::Mat temp; + if (!cap.read(temp)) + { + break; + } + // Turn OpenCV's Mat into something dlib can deal with. Note that this just + // wraps the Mat object, it doesn't copy anything. So cimg is only valid as + // long as temp is valid. Also don't do anything to temp that would cause it + // to reallocate the memory which stores the image as that will make cimg + // contain dangling pointers. This basically means you shouldn't modify temp + // while using cimg. + cv_image<bgr_pixel> cimg(temp); + + // Detect faces + std::vector<rectangle> faces = detector(cimg); + // Find the pose of each face. + std::vector<full_object_detection> shapes; + for (unsigned long i = 0; i < faces.size(); ++i) + shapes.push_back(pose_model(cimg, faces[i])); + + // Display it all on the screen + win.clear_overlay(); + win.set_image(cimg); + win.add_overlay(render_face_detections(shapes)); + } + } + catch(serialization_error& e) + { + cout << "You need dlib's default face landmarking model file to run this example." << endl; + cout << "You can get it from the following URL: " << endl; + cout << " http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2" << endl; + cout << endl << e.what() << endl; + } + catch(exception& e) + { + cout << e.what() << endl; + } +} + diff --git a/ml/dlib/examples/xml_parser_ex.cpp b/ml/dlib/examples/xml_parser_ex.cpp new file mode 100644 index 00000000..0d213959 --- /dev/null +++ b/ml/dlib/examples/xml_parser_ex.cpp @@ -0,0 +1,115 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example illustrating the use of the xml_parser component in + the dlib C++ Library. + + This example simply reads in an xml file and prints the parsing events + to the screen. +*/ + + + + +#include <dlib/xml_parser.h> +#include <iostream> +#include <fstream> + + +using namespace std; +using namespace dlib; + +// ---------------------------------------------------------------------------------------- + +class doc_handler : public document_handler +{ + /* + As the parser runs it generates events when it encounters tags and + data in an XML file. To be able to receive these events all you have to + do is make a class that inherits from dlib::document_handler and + implements its virtual methods. Then you simply associate an + instance of your class with the xml_parser. + + So this class is a simple example document handler that just prints + all the events to the screen. + */ +public: + + virtual void start_document ( + ) + { + cout << "parsing begins" << endl; + } + + virtual void end_document ( + ) + { + cout << "Parsing done" << endl; + } + + virtual void start_element ( + const unsigned long line_number, + const std::string& name, + const dlib::attribute_list& atts + ) + { + cout << "on line " << line_number << " we hit the <" << name << "> tag" << endl; + + // print all the tag's attributes + atts.reset(); + while (atts.move_next()) + { + cout << "\tattribute: " << atts.element().key() << " = " << atts.element().value() << endl; + } + } + + virtual void end_element ( + const unsigned long line_number, + const std::string& name + ) + { + cout << "on line " << line_number << " we hit the closing tag </" << name << ">" << endl; + } + + virtual void characters ( + const std::string& data + ) + { + cout << "Got some data between tags and it is:\n" << data << endl; + } + + virtual void processing_instruction ( + const unsigned long line_number, + const std::string& target, + const std::string& data + ) + { + cout << "on line " << line_number << " we hit a processing instruction with a target of '" + << target << "' and data '" << data << "'" << endl; + } +}; + +// ---------------------------------------------------------------------------------------- + +int main(int argc, char** argv) +{ + try + { + // Check if the user entered an argument to this application. + if (argc != 2) + { + cout << "Please enter an xml file to parse on the command line" << endl; + return 1; + } + + doc_handler dh; + // Now run the parser and tell it to call our doc_handler for each of the parsing + // events. + parse_xml(argv[1], dh); + } + catch (std::exception& e) + { + cout << e.what() << endl; + } +} + |