summaryrefslogtreecommitdiffstats
path: root/ml/dlib/examples/empirical_kernel_map_ex.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/examples/empirical_kernel_map_ex.cpp')
-rw-r--r--ml/dlib/examples/empirical_kernel_map_ex.cpp355
1 files changed, 0 insertions, 355 deletions
diff --git a/ml/dlib/examples/empirical_kernel_map_ex.cpp b/ml/dlib/examples/empirical_kernel_map_ex.cpp
deleted file mode 100644
index 9f7b1a579..000000000
--- a/ml/dlib/examples/empirical_kernel_map_ex.cpp
+++ /dev/null
@@ -1,355 +0,0 @@
-// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
-/*
-
- This is an example illustrating the use of the empirical_kernel_map
- from the dlib C++ Library.
-
- This example program assumes you are familiar with some general elements of
- the library. In particular, you should have at least read the svm_ex.cpp
- and matrix_ex.cpp examples.
-
-
- Most of the machine learning algorithms in dlib are some flavor of "kernel machine".
- This means they are all simple linear algorithms that have been formulated such
- that the only way they look at the data given by a user is via dot products between
- the data samples. These algorithms are made more useful via the application of the
- so-called kernel trick. This trick is to replace the dot product with a user
- supplied function which takes two samples and returns a real number. This function
- is the kernel that is required by so many algorithms. The most basic kernel is the
- linear_kernel which is simply a normal dot product. More interesting, however,
- are kernels which first apply some nonlinear transformation to the user's data samples
- and then compute a dot product. In this way, a simple algorithm that finds a linear
- plane to separate data (e.g. the SVM algorithm) can be made to solve complex
- nonlinear learning problems.
-
- An important element of the kernel trick is that these kernel functions perform
- the nonlinear transformation implicitly. That is, if you look at the implementations
- of these kernel functions you won't see code that transforms two input vectors in
- some way and then computes their dot products. Instead you will see a simple function
- that takes two input vectors and just computes a single real number via some simple
- process. You can basically think of this as an optimization. Imagine that originally
- we wrote out the entire procedure to perform the nonlinear transformation and then
- compute the dot product but then noticed we could cancel a few terms here and there
- and simplify the whole thing down into a more compact and easily evaluated form.
- The result is a nice function that computes what we want but we no longer get to see
- what those nonlinearly transformed input vectors are.
-
- The empirical_kernel_map is a tool that undoes this. It allows you to obtain these
- nonlinearly transformed vectors. It does this by taking a set of data samples from
- the user (referred to as basis samples), applying the nonlinear transformation to all
- of them, and then constructing a set of orthonormal basis vectors which spans the space
- occupied by those transformed input samples. Then if we wish to obtain the nonlinear
- version of any data sample we can simply project it onto this orthonormal basis and
- we obtain a regular vector of real numbers which represents the nonlinearly transformed
- version of the data sample. The empirical_kernel_map has been formulated to use only
- dot products between data samples so it is capable of performing this service for any
- user supplied kernel function.
-
- The empirical_kernel_map is useful because it is often difficult to formulate an
- algorithm in a way that uses only dot products. So the empirical_kernel_map lets
- us easily kernelize any algorithm we like by using this object during a preprocessing
- step. However, it should be noted that the algorithm is only practical when used
- with at most a few thousand basis samples. Fortunately, most datasets live in
- subspaces that are relatively low dimensional. So for these datasets, using the
- empirical_kernel_map is practical assuming an appropriate set of basis samples can be
- selected by the user. To help with this dlib supplies the linearly_independent_subset_finder.
- I also often find that just picking a random subset of the data as a basis works well.
-
-
-
- In what follows, we walk through the process of creating an empirical_kernel_map,
- projecting data to obtain the nonlinearly transformed vectors, and then doing a
- few interesting things with the data.
-*/
-
-
-
-
-#include <dlib/svm.h>
-#include <dlib/rand.h>
-#include <iostream>
-#include <vector>
-
-
-using namespace std;
-using namespace dlib;
-
-// ----------------------------------------------------------------------------------------
-
-// First let's make a typedef for the kind of samples we will be using.
-typedef matrix<double, 0, 1> sample_type;
-
-// We will be using the radial_basis_kernel in this example program.
-typedef radial_basis_kernel<sample_type> kernel_type;
-
-// ----------------------------------------------------------------------------------------
-
-void generate_concentric_circles (
- std::vector<sample_type>& samples,
- std::vector<double>& labels,
- const int num_points
-);
-/*!
- requires
- - num_points > 0
- ensures
- - generates two circles centered at the point (0,0), one of radius 1 and
- the other of radius 5. These points are stored into samples. labels will
- tell you if a given samples is from the smaller circle (its label will be 1)
- or from the larger circle (its label will be 2).
- - each circle will be made up of num_points
-!*/
-
-// ----------------------------------------------------------------------------------------
-
-void test_empirical_kernel_map (
- const std::vector<sample_type>& samples,
- const std::vector<double>& labels,
- const empirical_kernel_map<kernel_type>& ekm
-);
-/*!
- This function computes various interesting things with the empirical_kernel_map.
- See its implementation below for details.
-!*/
-
-// ----------------------------------------------------------------------------------------
-
-int main()
-{
- std::vector<sample_type> samples;
- std::vector<double> labels;
-
- // Declare an instance of the kernel we will be using.
- const kernel_type kern(0.1);
-
- // create a dataset with two concentric circles. There will be 100 points on each circle.
- generate_concentric_circles(samples, labels, 100);
-
- empirical_kernel_map<kernel_type> ekm;
-
-
- // Here we create an empirical_kernel_map using all of our data samples as basis samples.
- cout << "\n\nBuilding an empirical_kernel_map with " << samples.size() << " basis samples." << endl;
- ekm.load(kern, samples);
- cout << "Test the empirical_kernel_map when loaded with every sample." << endl;
- test_empirical_kernel_map(samples, labels, ekm);
-
-
-
-
-
-
- // create a new dataset with two concentric circles. There will be 1000 points on each circle.
- generate_concentric_circles(samples, labels, 1000);
- // Rather than use all 2000 samples as basis samples we are going to use the
- // linearly_independent_subset_finder to pick out a good basis set. The idea behind this
- // object is to try and find the 40 or so samples that best spans the subspace containing all the
- // data.
- linearly_independent_subset_finder<kernel_type> lisf(kern, 40);
- // populate lisf with samples. We have configured it to allow at most 40 samples but this function
- // may determine that fewer samples are necessary to form a good basis. In this example program
- // it will select only 26.
- fill_lisf(lisf, samples);
-
- // Now reload the empirical_kernel_map but this time using only our small basis
- // selected using the linearly_independent_subset_finder.
- cout << "\n\nBuilding an empirical_kernel_map with " << lisf.size() << " basis samples." << endl;
- ekm.load(lisf);
- cout << "Test the empirical_kernel_map when loaded with samples from the lisf object." << endl;
- test_empirical_kernel_map(samples, labels, ekm);
-
-
- cout << endl;
-}
-
-// ----------------------------------------------------------------------------------------
-
-void test_empirical_kernel_map (
- const std::vector<sample_type>& samples,
- const std::vector<double>& labels,
- const empirical_kernel_map<kernel_type>& ekm
-)
-{
-
- std::vector<sample_type> projected_samples;
-
- // The first thing we do is compute the nonlinearly projected vectors using the
- // empirical_kernel_map.
- for (unsigned long i = 0; i < samples.size(); ++i)
- {
- projected_samples.push_back(ekm.project(samples[i]));
- }
-
- // Note that a kernel matrix is just a matrix M such that M(i,j) == kernel(samples[i],samples[j]).
- // So below we are computing the normal kernel matrix as given by the radial_basis_kernel and the
- // input samples. We also compute the kernel matrix for all the projected_samples as given by the
- // linear_kernel. Note that the linear_kernel just computes normal dot products. So what we want to
- // see is that the dot products between all the projected_samples samples are the same as the outputs
- // of the kernel function for their respective untransformed input samples. If they match then
- // we know that the empirical_kernel_map is working properly.
- const matrix<double> normal_kernel_matrix = kernel_matrix(ekm.get_kernel(), samples);
- const matrix<double> new_kernel_matrix = kernel_matrix(linear_kernel<sample_type>(), projected_samples);
-
- cout << "Max kernel matrix error: " << max(abs(normal_kernel_matrix - new_kernel_matrix)) << endl;
- cout << "Mean kernel matrix error: " << mean(abs(normal_kernel_matrix - new_kernel_matrix)) << endl;
- /*
- Example outputs from these cout statements.
- For the case where we use all samples as basis samples:
- Max kernel matrix error: 7.32747e-15
- Mean kernel matrix error: 7.47789e-16
-
- For the case where we use only 26 samples as basis samples:
- Max kernel matrix error: 0.000953573
- Mean kernel matrix error: 2.26008e-05
-
-
- Note that if we use enough basis samples we can perfectly span the space of input samples.
- In that case we get errors that are essentially just rounding noise (Moreover, using all the
- samples is always enough since they are always within their own span). Once we start
- to use fewer basis samples we may begin to get approximation error. In the second case we
- used 26 and we can see that the data doesn't really lay exactly in a 26 dimensional subspace.
- But it is pretty close.
- */
-
-
-
- // Now let's do something more interesting. The following loop finds the centroids
- // of the two classes of data.
- sample_type class1_center;
- sample_type class2_center;
- for (unsigned long i = 0; i < projected_samples.size(); ++i)
- {
- if (labels[i] == 1)
- class1_center += projected_samples[i];
- else
- class2_center += projected_samples[i];
- }
-
- const int points_per_class = samples.size()/2;
- class1_center /= points_per_class;
- class2_center /= points_per_class;
-
-
- // Now classify points by which center they are nearest. Recall that the data
- // is made up of two concentric circles. Normally you can't separate two concentric
- // circles by checking which points are nearest to each center since they have the same
- // centers. However, the kernel trick makes the data separable and the loop below will
- // perfectly classify each data point.
- for (unsigned long i = 0; i < projected_samples.size(); ++i)
- {
- double distance_to_class1 = length(projected_samples[i] - class1_center);
- double distance_to_class2 = length(projected_samples[i] - class2_center);
-
- bool predicted_as_class_1 = (distance_to_class1 < distance_to_class2);
-
- // Now print a message for any misclassified points.
- if (predicted_as_class_1 == true && labels[i] != 1)
- cout << "A point was misclassified" << endl;
-
- if (predicted_as_class_1 == false && labels[i] != 2)
- cout << "A point was misclassified" << endl;
- }
-
-
-
- // Next, note that classifying a point based on its distance between two other
- // points is the same thing as using the plane that lies between those two points
- // as a decision boundary. So let's compute that decision plane and use it to classify
- // all the points.
-
- sample_type plane_normal_vector = class1_center - class2_center;
- // The point right in the center of our two classes should be on the deciding plane, not
- // on one side or the other. This consideration brings us to the formula for the bias.
- double bias = dot((class1_center+class2_center)/2, plane_normal_vector);
-
- // Now classify points by which side of the plane they are on.
- for (unsigned long i = 0; i < projected_samples.size(); ++i)
- {
- double side = dot(plane_normal_vector, projected_samples[i]) - bias;
-
- bool predicted_as_class_1 = (side > 0);
-
- // Now print a message for any misclassified points.
- if (predicted_as_class_1 == true && labels[i] != 1)
- cout << "A point was misclassified" << endl;
-
- if (predicted_as_class_1 == false && labels[i] != 2)
- cout << "A point was misclassified" << endl;
- }
-
-
- // It would be nice to convert this decision rule into a normal decision_function object and
- // dispense with the empirical_kernel_map. Happily, it is possible to do so. Consider the
- // following example code:
- decision_function<kernel_type> dec_funct = ekm.convert_to_decision_function(plane_normal_vector);
- // The dec_funct now computes dot products between plane_normal_vector and the projection
- // of any sample point given to it. All that remains is to account for the bias.
- dec_funct.b = bias;
-
- // now classify points by which side of the plane they are on.
- for (unsigned long i = 0; i < samples.size(); ++i)
- {
- double side = dec_funct(samples[i]);
-
- // And let's just check that the dec_funct really does compute the same thing as the previous equation.
- double side_alternate_equation = dot(plane_normal_vector, projected_samples[i]) - bias;
- if (abs(side-side_alternate_equation) > 1e-14)
- cout << "dec_funct error: " << abs(side-side_alternate_equation) << endl;
-
- bool predicted_as_class_1 = (side > 0);
-
- // Now print a message for any misclassified points.
- if (predicted_as_class_1 == true && labels[i] != 1)
- cout << "A point was misclassified" << endl;
-
- if (predicted_as_class_1 == false && labels[i] != 2)
- cout << "A point was misclassified" << endl;
- }
-
-}
-
-// ----------------------------------------------------------------------------------------
-
-void generate_concentric_circles (
- std::vector<sample_type>& samples,
- std::vector<double>& labels,
- const int num
-)
-{
- sample_type m(2,1);
- samples.clear();
- labels.clear();
-
- dlib::rand rnd;
-
- // make some samples near the origin
- double radius = 1.0;
- for (long i = 0; i < num; ++i)
- {
- double sign = 1;
- if (rnd.get_random_double() < 0.5)
- sign = -1;
- m(0) = 2*radius*rnd.get_random_double()-radius;
- m(1) = sign*sqrt(radius*radius - m(0)*m(0));
-
- samples.push_back(m);
- labels.push_back(1);
- }
-
- // make some samples in a circle around the origin but far away
- radius = 5.0;
- for (long i = 0; i < num; ++i)
- {
- double sign = 1;
- if (rnd.get_random_double() < 0.5)
- sign = -1;
- m(0) = 2*radius*rnd.get_random_double()-radius;
- m(1) = sign*sqrt(radius*radius - m(0)*m(0));
-
- samples.push_back(m);
- labels.push_back(2);
- }
-}
-
-// ----------------------------------------------------------------------------------------
-