summaryrefslogtreecommitdiffstats
path: root/src/ml/dlib/examples/rvm_regression_ex.cpp
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-05 12:08:03 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-05 12:08:18 +0000
commit5da14042f70711ea5cf66e034699730335462f66 (patch)
tree0f6354ccac934ed87a2d555f45be4c831cf92f4a /src/ml/dlib/examples/rvm_regression_ex.cpp
parentReleasing debian version 1.44.3-2. (diff)
downloadnetdata-5da14042f70711ea5cf66e034699730335462f66.tar.xz
netdata-5da14042f70711ea5cf66e034699730335462f66.zip
Merging upstream version 1.45.3+dfsg.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/ml/dlib/examples/rvm_regression_ex.cpp')
-rw-r--r--src/ml/dlib/examples/rvm_regression_ex.cpp101
1 files changed, 101 insertions, 0 deletions
diff --git a/src/ml/dlib/examples/rvm_regression_ex.cpp b/src/ml/dlib/examples/rvm_regression_ex.cpp
new file mode 100644
index 000000000..d65cb5203
--- /dev/null
+++ b/src/ml/dlib/examples/rvm_regression_ex.cpp
@@ -0,0 +1,101 @@
+// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
+/*
+ This is an example illustrating the use of the RVM regression object
+ from the dlib C++ Library.
+
+ This example will train on data from the sinc function.
+
+*/
+
+#include <iostream>
+#include <vector>
+
+#include <dlib/svm.h>
+
+using namespace std;
+using namespace dlib;
+
+// Here is the sinc function we will be trying to learn with rvm regression
+double sinc(double x)
+{
+ if (x == 0)
+ return 1;
+ return sin(x)/x;
+}
+
+int main()
+{
+ // Here we declare that our samples will be 1 dimensional column vectors.
+ typedef matrix<double,1,1> sample_type;
+
+ // Now sample some points from the sinc() function
+ sample_type m;
+ std::vector<sample_type> samples;
+ std::vector<double> labels;
+ for (double x = -10; x <= 4; x += 1)
+ {
+ m(0) = x;
+ samples.push_back(m);
+ labels.push_back(sinc(x));
+ }
+
+ // Now we are making a typedef for the kind of kernel we want to use. I picked the
+ // radial basis kernel because it only has one parameter and generally gives good
+ // results without much fiddling.
+ typedef radial_basis_kernel<sample_type> kernel_type;
+
+ // Here we declare an instance of the rvm_regression_trainer object. This is the
+ // object that we will later use to do the training.
+ rvm_regression_trainer<kernel_type> trainer;
+
+ // Here we set the kernel we want to use for training. The radial_basis_kernel
+ // has a parameter called gamma that we need to determine. As a rule of thumb, a good
+ // gamma to try is 1.0/(mean squared distance between your sample points). So
+ // below we are using a similar value. Note also that using an inappropriately large
+ // gamma will cause the RVM training algorithm to run extremely slowly. What
+ // "large" means is relative to how spread out your data is. So it is important
+ // to use a rule like this as a starting point for determining the gamma value
+ // if you want to use the RVM. It is also probably a good idea to normalize your
+ // samples as shown in the rvm_ex.cpp example program.
+ const double gamma = 2.0/compute_mean_squared_distance(samples);
+ cout << "using gamma of " << gamma << endl;
+ trainer.set_kernel(kernel_type(gamma));
+
+ // One thing you can do to reduce the RVM training time is to make its
+ // stopping epsilon bigger. However, this might make the outputs less
+ // reliable. But sometimes it works out well. 0.001 is the default.
+ trainer.set_epsilon(0.001);
+
+ // now train a function based on our sample points
+ decision_function<kernel_type> test = trainer.train(samples, labels);
+
+ // now we output the value of the sinc function for a few test points as well as the
+ // value predicted by our regression.
+ m(0) = 2.5; cout << sinc(m(0)) << " " << test(m) << endl;
+ m(0) = 0.1; cout << sinc(m(0)) << " " << test(m) << endl;
+ m(0) = -4; cout << sinc(m(0)) << " " << test(m) << endl;
+ m(0) = 5.0; cout << sinc(m(0)) << " " << test(m) << endl;
+
+ // The output is as follows:
+ //using gamma of 0.05
+ //0.239389 0.240989
+ //0.998334 0.999538
+ //-0.189201 -0.188453
+ //-0.191785 -0.226516
+
+
+ // The first column is the true value of the sinc function and the second
+ // column is the output from the rvm estimate.
+
+
+
+ // Another thing that is worth knowing is that just about everything in dlib is serializable.
+ // So for example, you can save the test object to disk and recall it later like so:
+ serialize("saved_function.dat") << test;
+
+ // Now let's open that file back up and load the function object it contains.
+ deserialize("saved_function.dat") >> test;
+
+}
+
+