summaryrefslogtreecommitdiffstats
path: root/ml/dlib/examples/one_class_classifiers_ex.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/examples/one_class_classifiers_ex.cpp')
-rw-r--r--ml/dlib/examples/one_class_classifiers_ex.cpp245
1 files changed, 245 insertions, 0 deletions
diff --git a/ml/dlib/examples/one_class_classifiers_ex.cpp b/ml/dlib/examples/one_class_classifiers_ex.cpp
new file mode 100644
index 00000000..3394ee76
--- /dev/null
+++ b/ml/dlib/examples/one_class_classifiers_ex.cpp
@@ -0,0 +1,245 @@
+// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
+/*
+ This is an example illustrating the use of the tools in dlib for doing distribution
+ estimation or detecting anomalies using one-class support vector machines.
+
+ Unlike regular classifiers, these tools take unlabeled points and try to learn what
+ parts of the feature space normally contain data samples and which do not. Typically
+ you use these tools when you are interested in finding outliers or otherwise
+ identifying "unusual" data samples.
+
+ In this example, we will sample points from the sinc() function to generate our set of
+ "typical looking" points. Then we will train some one-class classifiers and use them
+ to predict if new points are unusual or not. In this case, unusual means a point is
+ not from the sinc() curve.
+*/
+
+#include <iostream>
+#include <vector>
+#include <dlib/svm.h>
+#include <dlib/gui_widgets.h>
+#include <dlib/array2d.h>
+#include <dlib/image_transforms.h>
+
+using namespace std;
+using namespace dlib;
+
+// Here is the sinc function we will be trying to learn with the one-class SVMs
+double sinc(double x)
+{
+ if (x == 0)
+ return 2;
+ return 2*sin(x)/x;
+}
+
+int main()
+{
+ // We will use column vectors to store our points. Here we make a convenient typedef
+ // for the kind of vector we will use.
+ typedef matrix<double,0,1> sample_type;
+
+ // Then we select the kernel we want to use. For our present problem the radial basis
+ // kernel is quite effective.
+ typedef radial_basis_kernel<sample_type> kernel_type;
+
+ // Now make the object responsible for training one-class SVMs.
+ svm_one_class_trainer<kernel_type> trainer;
+ // Here we set the width of the radial basis kernel to 4.0. Larger values make the
+ // width smaller and give the radial basis kernel more resolution. If you play with
+ // the value and observe the program output you will get a more intuitive feel for what
+ // that means.
+ trainer.set_kernel(kernel_type(4.0));
+
+ // Now sample some 2D points. The points will be located on the curve defined by the
+ // sinc() function.
+ std::vector<sample_type> samples;
+ sample_type m(2);
+ for (double x = -15; x <= 8; x += 0.3)
+ {
+ m(0) = x;
+ m(1) = sinc(x);
+ samples.push_back(m);
+ }
+
+ // Now train a one-class SVM. The result is a function, df(), that outputs large
+ // values for points from the sinc() curve and smaller values for points that are
+ // anomalous (i.e. not on the sinc() curve in our case).
+ decision_function<kernel_type> df = trainer.train(samples);
+
+ // So for example, let's look at the output from some points on the sinc() curve.
+ cout << "Points that are on the sinc function:\n";
+ m(0) = -1.5; m(1) = sinc(m(0)); cout << " " << df(m) << endl;
+ m(0) = -1.5; m(1) = sinc(m(0)); cout << " " << df(m) << endl;
+ m(0) = -0; m(1) = sinc(m(0)); cout << " " << df(m) << endl;
+ m(0) = -0.5; m(1) = sinc(m(0)); cout << " " << df(m) << endl;
+ m(0) = -4.1; m(1) = sinc(m(0)); cout << " " << df(m) << endl;
+ m(0) = -1.5; m(1) = sinc(m(0)); cout << " " << df(m) << endl;
+ m(0) = -0.5; m(1) = sinc(m(0)); cout << " " << df(m) << endl;
+
+ cout << endl;
+ // Now look at some outputs for points not on the sinc() curve. You will see that
+ // these values are all notably smaller.
+ cout << "Points that are NOT on the sinc function:\n";
+ m(0) = -1.5; m(1) = sinc(m(0))+4; cout << " " << df(m) << endl;
+ m(0) = -1.5; m(1) = sinc(m(0))+3; cout << " " << df(m) << endl;
+ m(0) = -0; m(1) = -sinc(m(0)); cout << " " << df(m) << endl;
+ m(0) = -0.5; m(1) = -sinc(m(0)); cout << " " << df(m) << endl;
+ m(0) = -4.1; m(1) = sinc(m(0))+2; cout << " " << df(m) << endl;
+ m(0) = -1.5; m(1) = sinc(m(0))+0.9; cout << " " << df(m) << endl;
+ m(0) = -0.5; m(1) = sinc(m(0))+1; cout << " " << df(m) << endl;
+
+ // The output is as follows:
+ /*
+ Points that are on the sinc function:
+ 0.000389691
+ 0.000389691
+ -0.000239037
+ -0.000179978
+ -0.000178491
+ 0.000389691
+ -0.000179978
+
+ Points that are NOT on the sinc function:
+ -0.269389
+ -0.269389
+ -0.269389
+ -0.269389
+ -0.269389
+ -0.239954
+ -0.264318
+ */
+
+ // So we can see that in this example the one-class SVM correctly indicates that
+ // the non-sinc points are definitely not points from the sinc() curve.
+
+
+ // It should be noted that the svm_one_class_trainer becomes very slow when you have
+ // more than 10 or 20 thousand training points. However, dlib comes with very fast SVM
+ // tools which you can use instead at the cost of a little more setup. In particular,
+ // it is possible to use one of dlib's very fast linear SVM solvers to train a one
+ // class SVM. This is what we do below. We will train on 115,000 points and it only
+ // takes a few seconds with this tool!
+ //
+ // The first step is constructing a feature space that is appropriate for use with a
+ // linear SVM. In general, this is quite problem dependent. However, if you have
+ // under about a hundred dimensions in your vectors then it can often be quite
+ // effective to use the empirical_kernel_map as we do below (see the
+ // empirical_kernel_map documentation and example program for an extended discussion of
+ // what it does).
+ //
+ // But putting the empirical_kernel_map aside, the most important step in turning a
+ // linear SVM into a one-class SVM is the following. We append a -1 value onto the end
+ // of each feature vector and then tell the trainer to force the weight for this
+ // feature to 1. This means that if the linear SVM assigned all other weights a value
+ // of 0 then the output from a learned decision function would always be -1. The
+ // second step is that we ask the SVM to label each training sample with +1. This
+ // causes the SVM to set the other feature weights such that the training samples have
+ // positive outputs from the learned decision function. But the starting bias for all
+ // the points in the whole feature space is -1. The result is that points outside our
+ // training set will not be affected, so their outputs from the decision function will
+ // remain close to -1.
+
+ empirical_kernel_map<kernel_type> ekm;
+ ekm.load(trainer.get_kernel(),samples);
+
+ samples.clear();
+ std::vector<double> labels;
+ // make a vector with just 1 element in it equal to -1.
+ sample_type bias(1);
+ bias = -1;
+ sample_type augmented;
+ // This time sample 115,000 points from the sinc() function.
+ for (double x = -15; x <= 8; x += 0.0002)
+ {
+ m(0) = x;
+ m(1) = sinc(x);
+ // Apply the empirical_kernel_map transformation and then append the -1 value
+ augmented = join_cols(ekm.project(m), bias);
+ samples.push_back(augmented);
+ labels.push_back(+1);
+ }
+ cout << "samples.size(): "<< samples.size() << endl;
+
+ // The svm_c_linear_dcd_trainer is a very fast SVM solver which only works with the
+ // linear_kernel. It has the nice feature of supporting this "force_last_weight_to_1"
+ // mode we discussed above.
+ svm_c_linear_dcd_trainer<linear_kernel<sample_type> > linear_trainer;
+ linear_trainer.force_last_weight_to_1(true);
+
+ // Train the SVM
+ decision_function<linear_kernel<sample_type> > df2 = linear_trainer.train(samples, labels);
+
+ // Here we test it as before, again we note that points from the sinc() curve have
+ // large outputs from the decision function. Note also that we must remember to
+ // transform the points in exactly the same manner used to construct the training set
+ // before giving them to df2() or the code will not work.
+ cout << "Points that are on the sinc function:\n";
+ m(0) = -1.5; m(1) = sinc(m(0)); cout << " " << df2(join_cols(ekm.project(m),bias)) << endl;
+ m(0) = -1.5; m(1) = sinc(m(0)); cout << " " << df2(join_cols(ekm.project(m),bias)) << endl;
+ m(0) = -0; m(1) = sinc(m(0)); cout << " " << df2(join_cols(ekm.project(m),bias)) << endl;
+ m(0) = -0.5; m(1) = sinc(m(0)); cout << " " << df2(join_cols(ekm.project(m),bias)) << endl;
+ m(0) = -4.1; m(1) = sinc(m(0)); cout << " " << df2(join_cols(ekm.project(m),bias)) << endl;
+ m(0) = -1.5; m(1) = sinc(m(0)); cout << " " << df2(join_cols(ekm.project(m),bias)) << endl;
+ m(0) = -0.5; m(1) = sinc(m(0)); cout << " " << df2(join_cols(ekm.project(m),bias)) << endl;
+
+ cout << endl;
+ // Again, we see here that points not on the sinc() function have small values.
+ cout << "Points that are NOT on the sinc function:\n";
+ m(0) = -1.5; m(1) = sinc(m(0))+4; cout << " " << df2(join_cols(ekm.project(m),bias)) << endl;
+ m(0) = -1.5; m(1) = sinc(m(0))+3; cout << " " << df2(join_cols(ekm.project(m),bias)) << endl;
+ m(0) = -0; m(1) = -sinc(m(0)); cout << " " << df2(join_cols(ekm.project(m),bias)) << endl;
+ m(0) = -0.5; m(1) = -sinc(m(0)); cout << " " << df2(join_cols(ekm.project(m),bias)) << endl;
+ m(0) = -4.1; m(1) = sinc(m(0))+2; cout << " " << df2(join_cols(ekm.project(m),bias)) << endl;
+ m(0) = -1.5; m(1) = sinc(m(0))+0.9; cout << " " << df2(join_cols(ekm.project(m),bias)) << endl;
+ m(0) = -0.5; m(1) = sinc(m(0))+1; cout << " " << df2(join_cols(ekm.project(m),bias)) << endl;
+
+
+ // The output is as follows:
+ /*
+ Points that are on the sinc function:
+ 1.00454
+ 1.00454
+ 1.00022
+ 1.00007
+ 1.00371
+ 1.00454
+ 1.00007
+
+ Points that are NOT on the sinc function:
+ -1
+ -1
+ -1
+ -1
+ -0.999998
+ -0.781231
+ -0.96242
+ */
+
+
+ // Finally, to help you visualize what is happening here we are going to plot the
+ // response of the one-class classifiers on the screen. The code below creates two
+ // heatmap images which show the response. In these images you can clearly see where
+ // the algorithms have identified the sinc() curve. The hotter the pixel looks, the
+ // larger the value coming out of the decision function and therefore the more "normal"
+ // it is according to the classifier.
+ const long size = 500;
+ array2d<double> img1(size,size);
+ array2d<double> img2(size,size);
+ for (long r = 0; r < img1.nr(); ++r)
+ {
+ for (long c = 0; c < img1.nc(); ++c)
+ {
+ double x = 30.0*c/size - 19;
+ double y = 8.0*r/size - 4;
+ m(0) = x;
+ m(1) = y;
+ img1[r][c] = df(m);
+ img2[r][c] = df2(join_cols(ekm.project(m),bias));
+ }
+ }
+ image_window win1(heatmap(img1), "svm_one_class_trainer");
+ image_window win2(heatmap(img2), "svm_c_linear_dcd_trainer");
+ win1.wait_until_closed();
+}
+
+