From 310edf444908b09ea6d00c03baceb7925f3bb7a2 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 21 Mar 2024 18:19:04 +0100 Subject: Merging upstream version 1.45.0. Signed-off-by: Daniel Baumann --- ml/dlib/examples/empirical_kernel_map_ex.cpp | 355 --------------------------- 1 file changed, 355 deletions(-) delete mode 100644 ml/dlib/examples/empirical_kernel_map_ex.cpp (limited to 'ml/dlib/examples/empirical_kernel_map_ex.cpp') diff --git a/ml/dlib/examples/empirical_kernel_map_ex.cpp b/ml/dlib/examples/empirical_kernel_map_ex.cpp deleted file mode 100644 index 9f7b1a579..000000000 --- a/ml/dlib/examples/empirical_kernel_map_ex.cpp +++ /dev/null @@ -1,355 +0,0 @@ -// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt -/* - - This is an example illustrating the use of the empirical_kernel_map - from the dlib C++ Library. - - This example program assumes you are familiar with some general elements of - the library. In particular, you should have at least read the svm_ex.cpp - and matrix_ex.cpp examples. - - - Most of the machine learning algorithms in dlib are some flavor of "kernel machine". - This means they are all simple linear algorithms that have been formulated such - that the only way they look at the data given by a user is via dot products between - the data samples. These algorithms are made more useful via the application of the - so-called kernel trick. This trick is to replace the dot product with a user - supplied function which takes two samples and returns a real number. This function - is the kernel that is required by so many algorithms. The most basic kernel is the - linear_kernel which is simply a normal dot product. More interesting, however, - are kernels which first apply some nonlinear transformation to the user's data samples - and then compute a dot product. In this way, a simple algorithm that finds a linear - plane to separate data (e.g. the SVM algorithm) can be made to solve complex - nonlinear learning problems. - - An important element of the kernel trick is that these kernel functions perform - the nonlinear transformation implicitly. That is, if you look at the implementations - of these kernel functions you won't see code that transforms two input vectors in - some way and then computes their dot products. Instead you will see a simple function - that takes two input vectors and just computes a single real number via some simple - process. You can basically think of this as an optimization. Imagine that originally - we wrote out the entire procedure to perform the nonlinear transformation and then - compute the dot product but then noticed we could cancel a few terms here and there - and simplify the whole thing down into a more compact and easily evaluated form. - The result is a nice function that computes what we want but we no longer get to see - what those nonlinearly transformed input vectors are. - - The empirical_kernel_map is a tool that undoes this. It allows you to obtain these - nonlinearly transformed vectors. It does this by taking a set of data samples from - the user (referred to as basis samples), applying the nonlinear transformation to all - of them, and then constructing a set of orthonormal basis vectors which spans the space - occupied by those transformed input samples. Then if we wish to obtain the nonlinear - version of any data sample we can simply project it onto this orthonormal basis and - we obtain a regular vector of real numbers which represents the nonlinearly transformed - version of the data sample. The empirical_kernel_map has been formulated to use only - dot products between data samples so it is capable of performing this service for any - user supplied kernel function. - - The empirical_kernel_map is useful because it is often difficult to formulate an - algorithm in a way that uses only dot products. So the empirical_kernel_map lets - us easily kernelize any algorithm we like by using this object during a preprocessing - step. However, it should be noted that the algorithm is only practical when used - with at most a few thousand basis samples. Fortunately, most datasets live in - subspaces that are relatively low dimensional. So for these datasets, using the - empirical_kernel_map is practical assuming an appropriate set of basis samples can be - selected by the user. To help with this dlib supplies the linearly_independent_subset_finder. - I also often find that just picking a random subset of the data as a basis works well. - - - - In what follows, we walk through the process of creating an empirical_kernel_map, - projecting data to obtain the nonlinearly transformed vectors, and then doing a - few interesting things with the data. -*/ - - - - -#include -#include -#include -#include - - -using namespace std; -using namespace dlib; - -// ---------------------------------------------------------------------------------------- - -// First let's make a typedef for the kind of samples we will be using. -typedef matrix sample_type; - -// We will be using the radial_basis_kernel in this example program. -typedef radial_basis_kernel kernel_type; - -// ---------------------------------------------------------------------------------------- - -void generate_concentric_circles ( - std::vector& samples, - std::vector& labels, - const int num_points -); -/*! - requires - - num_points > 0 - ensures - - generates two circles centered at the point (0,0), one of radius 1 and - the other of radius 5. These points are stored into samples. labels will - tell you if a given samples is from the smaller circle (its label will be 1) - or from the larger circle (its label will be 2). - - each circle will be made up of num_points -!*/ - -// ---------------------------------------------------------------------------------------- - -void test_empirical_kernel_map ( - const std::vector& samples, - const std::vector& labels, - const empirical_kernel_map& ekm -); -/*! - This function computes various interesting things with the empirical_kernel_map. - See its implementation below for details. -!*/ - -// ---------------------------------------------------------------------------------------- - -int main() -{ - std::vector samples; - std::vector labels; - - // Declare an instance of the kernel we will be using. - const kernel_type kern(0.1); - - // create a dataset with two concentric circles. There will be 100 points on each circle. - generate_concentric_circles(samples, labels, 100); - - empirical_kernel_map ekm; - - - // Here we create an empirical_kernel_map using all of our data samples as basis samples. - cout << "\n\nBuilding an empirical_kernel_map with " << samples.size() << " basis samples." << endl; - ekm.load(kern, samples); - cout << "Test the empirical_kernel_map when loaded with every sample." << endl; - test_empirical_kernel_map(samples, labels, ekm); - - - - - - - // create a new dataset with two concentric circles. There will be 1000 points on each circle. - generate_concentric_circles(samples, labels, 1000); - // Rather than use all 2000 samples as basis samples we are going to use the - // linearly_independent_subset_finder to pick out a good basis set. The idea behind this - // object is to try and find the 40 or so samples that best spans the subspace containing all the - // data. - linearly_independent_subset_finder lisf(kern, 40); - // populate lisf with samples. We have configured it to allow at most 40 samples but this function - // may determine that fewer samples are necessary to form a good basis. In this example program - // it will select only 26. - fill_lisf(lisf, samples); - - // Now reload the empirical_kernel_map but this time using only our small basis - // selected using the linearly_independent_subset_finder. - cout << "\n\nBuilding an empirical_kernel_map with " << lisf.size() << " basis samples." << endl; - ekm.load(lisf); - cout << "Test the empirical_kernel_map when loaded with samples from the lisf object." << endl; - test_empirical_kernel_map(samples, labels, ekm); - - - cout << endl; -} - -// ---------------------------------------------------------------------------------------- - -void test_empirical_kernel_map ( - const std::vector& samples, - const std::vector& labels, - const empirical_kernel_map& ekm -) -{ - - std::vector projected_samples; - - // The first thing we do is compute the nonlinearly projected vectors using the - // empirical_kernel_map. - for (unsigned long i = 0; i < samples.size(); ++i) - { - projected_samples.push_back(ekm.project(samples[i])); - } - - // Note that a kernel matrix is just a matrix M such that M(i,j) == kernel(samples[i],samples[j]). - // So below we are computing the normal kernel matrix as given by the radial_basis_kernel and the - // input samples. We also compute the kernel matrix for all the projected_samples as given by the - // linear_kernel. Note that the linear_kernel just computes normal dot products. So what we want to - // see is that the dot products between all the projected_samples samples are the same as the outputs - // of the kernel function for their respective untransformed input samples. If they match then - // we know that the empirical_kernel_map is working properly. - const matrix normal_kernel_matrix = kernel_matrix(ekm.get_kernel(), samples); - const matrix new_kernel_matrix = kernel_matrix(linear_kernel(), projected_samples); - - cout << "Max kernel matrix error: " << max(abs(normal_kernel_matrix - new_kernel_matrix)) << endl; - cout << "Mean kernel matrix error: " << mean(abs(normal_kernel_matrix - new_kernel_matrix)) << endl; - /* - Example outputs from these cout statements. - For the case where we use all samples as basis samples: - Max kernel matrix error: 7.32747e-15 - Mean kernel matrix error: 7.47789e-16 - - For the case where we use only 26 samples as basis samples: - Max kernel matrix error: 0.000953573 - Mean kernel matrix error: 2.26008e-05 - - - Note that if we use enough basis samples we can perfectly span the space of input samples. - In that case we get errors that are essentially just rounding noise (Moreover, using all the - samples is always enough since they are always within their own span). Once we start - to use fewer basis samples we may begin to get approximation error. In the second case we - used 26 and we can see that the data doesn't really lay exactly in a 26 dimensional subspace. - But it is pretty close. - */ - - - - // Now let's do something more interesting. The following loop finds the centroids - // of the two classes of data. - sample_type class1_center; - sample_type class2_center; - for (unsigned long i = 0; i < projected_samples.size(); ++i) - { - if (labels[i] == 1) - class1_center += projected_samples[i]; - else - class2_center += projected_samples[i]; - } - - const int points_per_class = samples.size()/2; - class1_center /= points_per_class; - class2_center /= points_per_class; - - - // Now classify points by which center they are nearest. Recall that the data - // is made up of two concentric circles. Normally you can't separate two concentric - // circles by checking which points are nearest to each center since they have the same - // centers. However, the kernel trick makes the data separable and the loop below will - // perfectly classify each data point. - for (unsigned long i = 0; i < projected_samples.size(); ++i) - { - double distance_to_class1 = length(projected_samples[i] - class1_center); - double distance_to_class2 = length(projected_samples[i] - class2_center); - - bool predicted_as_class_1 = (distance_to_class1 < distance_to_class2); - - // Now print a message for any misclassified points. - if (predicted_as_class_1 == true && labels[i] != 1) - cout << "A point was misclassified" << endl; - - if (predicted_as_class_1 == false && labels[i] != 2) - cout << "A point was misclassified" << endl; - } - - - - // Next, note that classifying a point based on its distance between two other - // points is the same thing as using the plane that lies between those two points - // as a decision boundary. So let's compute that decision plane and use it to classify - // all the points. - - sample_type plane_normal_vector = class1_center - class2_center; - // The point right in the center of our two classes should be on the deciding plane, not - // on one side or the other. This consideration brings us to the formula for the bias. - double bias = dot((class1_center+class2_center)/2, plane_normal_vector); - - // Now classify points by which side of the plane they are on. - for (unsigned long i = 0; i < projected_samples.size(); ++i) - { - double side = dot(plane_normal_vector, projected_samples[i]) - bias; - - bool predicted_as_class_1 = (side > 0); - - // Now print a message for any misclassified points. - if (predicted_as_class_1 == true && labels[i] != 1) - cout << "A point was misclassified" << endl; - - if (predicted_as_class_1 == false && labels[i] != 2) - cout << "A point was misclassified" << endl; - } - - - // It would be nice to convert this decision rule into a normal decision_function object and - // dispense with the empirical_kernel_map. Happily, it is possible to do so. Consider the - // following example code: - decision_function dec_funct = ekm.convert_to_decision_function(plane_normal_vector); - // The dec_funct now computes dot products between plane_normal_vector and the projection - // of any sample point given to it. All that remains is to account for the bias. - dec_funct.b = bias; - - // now classify points by which side of the plane they are on. - for (unsigned long i = 0; i < samples.size(); ++i) - { - double side = dec_funct(samples[i]); - - // And let's just check that the dec_funct really does compute the same thing as the previous equation. - double side_alternate_equation = dot(plane_normal_vector, projected_samples[i]) - bias; - if (abs(side-side_alternate_equation) > 1e-14) - cout << "dec_funct error: " << abs(side-side_alternate_equation) << endl; - - bool predicted_as_class_1 = (side > 0); - - // Now print a message for any misclassified points. - if (predicted_as_class_1 == true && labels[i] != 1) - cout << "A point was misclassified" << endl; - - if (predicted_as_class_1 == false && labels[i] != 2) - cout << "A point was misclassified" << endl; - } - -} - -// ---------------------------------------------------------------------------------------- - -void generate_concentric_circles ( - std::vector& samples, - std::vector& labels, - const int num -) -{ - sample_type m(2,1); - samples.clear(); - labels.clear(); - - dlib::rand rnd; - - // make some samples near the origin - double radius = 1.0; - for (long i = 0; i < num; ++i) - { - double sign = 1; - if (rnd.get_random_double() < 0.5) - sign = -1; - m(0) = 2*radius*rnd.get_random_double()-radius; - m(1) = sign*sqrt(radius*radius - m(0)*m(0)); - - samples.push_back(m); - labels.push_back(1); - } - - // make some samples in a circle around the origin but far away - radius = 5.0; - for (long i = 0; i < num; ++i) - { - double sign = 1; - if (rnd.get_random_double() < 0.5) - sign = -1; - m(0) = 2*radius*rnd.get_random_double()-radius; - m(1) = sign*sqrt(radius*radius - m(0)*m(0)); - - samples.push_back(m); - labels.push_back(2); - } -} - -// ---------------------------------------------------------------------------------------- - -- cgit v1.2.3