summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/test/linear_manifold_regularizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/dlib/test/linear_manifold_regularizer.cpp')
-rw-r--r--ml/dlib/dlib/test/linear_manifold_regularizer.cpp408
1 files changed, 0 insertions, 408 deletions
diff --git a/ml/dlib/dlib/test/linear_manifold_regularizer.cpp b/ml/dlib/dlib/test/linear_manifold_regularizer.cpp
deleted file mode 100644
index e73b1c8d3..000000000
--- a/ml/dlib/dlib/test/linear_manifold_regularizer.cpp
+++ /dev/null
@@ -1,408 +0,0 @@
-// Copyright (C) 2010 Davis E. King (davis@dlib.net)
-// License: Boost Software License See LICENSE.txt for the full license.
-
-#include "tester.h"
-#include <dlib/manifold_regularization.h>
-#include <dlib/svm.h>
-#include <dlib/rand.h>
-#include <dlib/string.h>
-#include <dlib/graph_utils_threaded.h>
-#include <vector>
-#include <sstream>
-#include <ctime>
-
-namespace
-{
- using namespace test;
- using namespace dlib;
- using namespace std;
- dlib::logger dlog("test.linear_manifold_regularizer");
-
- template <typename hash_type, typename samples_type>
- void test_find_k_nearest_neighbors_lsh(
- const samples_type& samples
- )
- {
- std::vector<sample_pair> edges1, edges2;
-
- find_k_nearest_neighbors(samples, cosine_distance(), 2, edges1);
- find_k_nearest_neighbors_lsh(samples, cosine_distance(), hash_type(), 2, 6, edges2, 2);
-
- std::sort(edges1.begin(), edges1.end(), order_by_index<sample_pair>);
- std::sort(edges2.begin(), edges2.end(), order_by_index<sample_pair>);
-
- DLIB_TEST_MSG(edges1.size() == edges2.size(), edges1.size() << " " << edges2.size());
- for (unsigned long i = 0; i < edges1.size(); ++i)
- {
- DLIB_TEST(edges1[i] == edges2[i]);
- DLIB_TEST_MSG(std::abs(edges1[i].distance() - edges2[i].distance()) < 1e-7,
- edges1[i].distance() - edges2[i].distance());
- }
- }
-
- template <typename scalar_type>
- void test_knn_lsh_sparse()
- {
- dlib::rand rnd;
- std::vector<std::map<unsigned long,scalar_type> > samples;
- samples.resize(20);
- for (unsigned int i = 0; i < samples.size(); ++i)
- {
- samples[i][0] = rnd.get_random_gaussian();
- samples[i][2] = rnd.get_random_gaussian();
- }
-
- test_find_k_nearest_neighbors_lsh<hash_similar_angles_64>(samples);
- test_find_k_nearest_neighbors_lsh<hash_similar_angles_128>(samples);
- test_find_k_nearest_neighbors_lsh<hash_similar_angles_256>(samples);
- test_find_k_nearest_neighbors_lsh<hash_similar_angles_512>(samples);
- }
-
- template <typename scalar_type>
- void test_knn_lsh_dense()
- {
- dlib::rand rnd;
- std::vector<matrix<scalar_type,0,1> > samples;
- samples.resize(20);
- for (unsigned int i = 0; i < samples.size(); ++i)
- {
- samples[i].set_size(2);
- samples[i](0) = rnd.get_random_gaussian();
- samples[i](1) = rnd.get_random_gaussian();
- }
-
- test_find_k_nearest_neighbors_lsh<hash_similar_angles_64>(samples);
- test_find_k_nearest_neighbors_lsh<hash_similar_angles_128>(samples);
- test_find_k_nearest_neighbors_lsh<hash_similar_angles_256>(samples);
- test_find_k_nearest_neighbors_lsh<hash_similar_angles_512>(samples);
- }
-
-
-
- class linear_manifold_regularizer_tester : public tester
- {
- /*!
- WHAT THIS OBJECT REPRESENTS
- This object represents a unit test. When it is constructed
- it adds itself into the testing framework.
- !*/
- public:
- linear_manifold_regularizer_tester (
- ) :
- tester (
- "test_linear_manifold_regularizer", // the command line argument name for this test
- "Run tests on the linear_manifold_regularizer object.", // the command line argument description
- 0 // the number of command line arguments for this test
- )
- {
- seed = 1;
- }
-
- dlib::rand rnd;
-
- unsigned long seed;
-
- typedef matrix<double, 0, 1> sample_type;
- typedef radial_basis_kernel<sample_type> kernel_type;
-
- void do_the_test()
- {
- print_spinner();
- std::vector<sample_type> samples;
-
- // Declare an instance of the kernel we will be using.
- const kernel_type kern(0.1);
-
- const unsigned long num_points = 200;
-
- // create a large dataset with two concentric circles.
- generate_circle(samples, 1, num_points); // circle of radius 1
- generate_circle(samples, 5, num_points); // circle of radius 5
-
- std::vector<sample_pair> edges;
- find_percent_shortest_edges_randomly(samples, squared_euclidean_distance(0.1, 4), 1, 10000, "random seed", edges);
-
- dlog << LTRACE << "number of edges generated: " << edges.size();
-
- empirical_kernel_map<kernel_type> ekm;
-
- ekm.load(kern, randomly_subsample(samples, 100));
-
- // Project all the samples into the span of our 50 basis samples
- for (unsigned long i = 0; i < samples.size(); ++i)
- samples[i] = ekm.project(samples[i]);
-
-
- // Now create the manifold regularizer. The result is a transformation matrix that
- // embodies the manifold assumption discussed above.
- linear_manifold_regularizer<sample_type> lmr;
- lmr.build(samples, edges, use_gaussian_weights(0.1));
- matrix<double> T = lmr.get_transformation_matrix(10000);
-
- print_spinner();
-
- // generate the T matrix manually and make sure it matches. The point of this test
- // is to make sure that the more complex version of this that happens inside the linear_manifold_regularizer
- // is correct. It uses a tedious block of loops to do it in a way that is a lot faster for sparse
- // W matrices but isn't super straight forward.
- matrix<double> X(samples[0].size(), samples.size());
- for (unsigned long i = 0; i < samples.size(); ++i)
- set_colm(X,i) = samples[i];
-
- matrix<double> W(samples.size(), samples.size());
- W = 0;
- for (unsigned long i = 0; i < edges.size(); ++i)
- {
- W(edges[i].index1(), edges[i].index2()) = use_gaussian_weights(0.1)(edges[i]);
- W(edges[i].index2(), edges[i].index1()) = use_gaussian_weights(0.1)(edges[i]);
- }
- matrix<double> L = diagm(sum_rows(W)) - W;
- matrix<double> trueT = inv_lower_triangular(chol(identity_matrix<double>(X.nr()) + (10000.0/sum(lowerm(W)))*X*L*trans(X)));
-
- dlog << LTRACE << "T error: "<< max(abs(T - trueT));
- DLIB_TEST(max(abs(T - trueT)) < 1e-7);
-
-
- print_spinner();
- // Apply the transformation generated by the linear_manifold_regularizer to
- // all our samples.
- for (unsigned long i = 0; i < samples.size(); ++i)
- samples[i] = T*samples[i];
-
-
- // For convenience, generate a projection_function and merge the transformation
- // matrix T into it.
- projection_function<kernel_type> proj = ekm.get_projection_function();
- proj.weights = T*proj.weights;
-
-
- // Pick 2 different labeled points. One on the inner circle and another on the outer.
- // For each of these test points we will see if using the single plane that separates
- // them is a good way to separate the concentric circles. Also do this a bunch
- // of times with different randomly chosen points so we can see how robust the result is.
- for (int itr = 0; itr < 10; ++itr)
- {
- print_spinner();
- std::vector<sample_type> test_points;
- // generate a random point from the radius 1 circle
- generate_circle(test_points, 1, 1);
- // generate a random point from the radius 5 circle
- generate_circle(test_points, 5, 1);
-
- // project the two test points into kernel space. Recall that this projection_function
- // has the manifold regularizer incorporated into it.
- const sample_type class1_point = proj(test_points[0]);
- const sample_type class2_point = proj(test_points[1]);
-
- double num_wrong = 0;
-
- // Now attempt to classify all the data samples according to which point
- // they are closest to. The output of this program shows that without manifold
- // regularization this test will fail but with it it will perfectly classify
- // all the points.
- for (unsigned long i = 0; i < samples.size(); ++i)
- {
- double distance_to_class1 = length(samples[i] - class1_point);
- double distance_to_class2 = length(samples[i] - class2_point);
-
- bool predicted_as_class_1 = (distance_to_class1 < distance_to_class2);
-
- bool really_is_class_1 = (i < num_points);
-
- // now count how many times we make a mistake
- if (predicted_as_class_1 != really_is_class_1)
- ++num_wrong;
- }
-
- DLIB_TEST_MSG(num_wrong == 0, num_wrong);
- }
-
- }
-
- void generate_circle (
- std::vector<sample_type>& samples,
- double radius,
- const long num
- )
- {
- sample_type m(2,1);
-
- for (long i = 0; i < num; ++i)
- {
- double sign = 1;
- if (rnd.get_random_double() < 0.5)
- sign = -1;
- m(0) = 2*radius*rnd.get_random_double()-radius;
- m(1) = sign*sqrt(radius*radius - m(0)*m(0));
-
- samples.push_back(m);
- }
- }
-
-
- void test_knn1()
- {
- std::vector<matrix<double,2,1> > samples;
-
- matrix<double,2,1> test;
-
- test = 0,0; samples.push_back(test);
- test = 1,1; samples.push_back(test);
- test = 1,-1; samples.push_back(test);
- test = -1,1; samples.push_back(test);
- test = -1,-1; samples.push_back(test);
-
- std::vector<sample_pair> edges;
- find_k_nearest_neighbors(samples, squared_euclidean_distance(), 1, edges);
- DLIB_TEST(edges.size() == 4);
-
- std::sort(edges.begin(), edges.end(), &order_by_index<sample_pair>);
-
- DLIB_TEST(edges[0] == sample_pair(0,1,0));
- DLIB_TEST(edges[1] == sample_pair(0,2,0));
- DLIB_TEST(edges[2] == sample_pair(0,3,0));
- DLIB_TEST(edges[3] == sample_pair(0,4,0));
-
- find_k_nearest_neighbors(samples, squared_euclidean_distance(), 3, edges);
- DLIB_TEST(edges.size() == 8);
-
- find_k_nearest_neighbors(samples, squared_euclidean_distance(3.9, 4.1), 3, edges);
- DLIB_TEST(edges.size() == 4);
-
- std::sort(edges.begin(), edges.end(), &order_by_index<sample_pair>);
-
- DLIB_TEST(edges[0] == sample_pair(1,2,0));
- DLIB_TEST(edges[1] == sample_pair(1,3,0));
- DLIB_TEST(edges[2] == sample_pair(2,4,0));
- DLIB_TEST(edges[3] == sample_pair(3,4,0));
-
- find_k_nearest_neighbors(samples, squared_euclidean_distance(30000, 4.1), 3, edges);
- DLIB_TEST(edges.size() == 0);
- }
-
- void test_knn1_approx()
- {
- std::vector<matrix<double,2,1> > samples;
-
- matrix<double,2,1> test;
-
- test = 0,0; samples.push_back(test);
- test = 1,1; samples.push_back(test);
- test = 1,-1; samples.push_back(test);
- test = -1,1; samples.push_back(test);
- test = -1,-1; samples.push_back(test);
-
- std::vector<sample_pair> edges;
- find_approximate_k_nearest_neighbors(samples, squared_euclidean_distance(), 1, 10000, seed, edges);
- DLIB_TEST(edges.size() == 4);
-
- std::sort(edges.begin(), edges.end(), &order_by_index<sample_pair>);
-
- DLIB_TEST(edges[0] == sample_pair(0,1,0));
- DLIB_TEST(edges[1] == sample_pair(0,2,0));
- DLIB_TEST(edges[2] == sample_pair(0,3,0));
- DLIB_TEST(edges[3] == sample_pair(0,4,0));
-
- find_approximate_k_nearest_neighbors(samples, squared_euclidean_distance(), 3, 10000, seed, edges);
- DLIB_TEST(edges.size() == 8);
-
- find_approximate_k_nearest_neighbors(samples, squared_euclidean_distance(3.9, 4.1), 3, 10000, seed, edges);
- DLIB_TEST(edges.size() == 4);
-
- std::sort(edges.begin(), edges.end(), &order_by_index<sample_pair>);
-
- DLIB_TEST(edges[0] == sample_pair(1,2,0));
- DLIB_TEST(edges[1] == sample_pair(1,3,0));
- DLIB_TEST(edges[2] == sample_pair(2,4,0));
- DLIB_TEST(edges[3] == sample_pair(3,4,0));
-
- find_approximate_k_nearest_neighbors(samples, squared_euclidean_distance(30000, 4.1), 3, 10000, seed, edges);
- DLIB_TEST(edges.size() == 0);
- }
-
- void test_knn2()
- {
- std::vector<matrix<double,2,1> > samples;
-
- matrix<double,2,1> test;
-
- test = 1,1; samples.push_back(test);
- test = 1,-1; samples.push_back(test);
- test = -1,1; samples.push_back(test);
- test = -1,-1; samples.push_back(test);
-
- std::vector<sample_pair> edges;
- find_k_nearest_neighbors(samples, squared_euclidean_distance(), 2, edges);
- DLIB_TEST(edges.size() == 4);
-
- std::sort(edges.begin(), edges.end(), &order_by_index<sample_pair>);
-
- DLIB_TEST(edges[0] == sample_pair(0,1,0));
- DLIB_TEST(edges[1] == sample_pair(0,2,0));
- DLIB_TEST(edges[2] == sample_pair(1,3,0));
- DLIB_TEST(edges[3] == sample_pair(2,3,0));
-
- find_k_nearest_neighbors(samples, squared_euclidean_distance(), 200, edges);
- DLIB_TEST(edges.size() == 4*3/2);
- }
-
- void test_knn2_approx()
- {
- std::vector<matrix<double,2,1> > samples;
-
- matrix<double,2,1> test;
-
- test = 1,1; samples.push_back(test);
- test = 1,-1; samples.push_back(test);
- test = -1,1; samples.push_back(test);
- test = -1,-1; samples.push_back(test);
-
- std::vector<sample_pair> edges;
- // For this simple graph and high number of samples we will do we should obtain the exact
- // knn solution.
- find_approximate_k_nearest_neighbors(samples, squared_euclidean_distance(), 2, 10000, seed, edges);
- DLIB_TEST(edges.size() == 4);
-
- std::sort(edges.begin(), edges.end(), &order_by_index<sample_pair>);
-
- DLIB_TEST(edges[0] == sample_pair(0,1,0));
- DLIB_TEST(edges[1] == sample_pair(0,2,0));
- DLIB_TEST(edges[2] == sample_pair(1,3,0));
- DLIB_TEST(edges[3] == sample_pair(2,3,0));
-
-
- find_approximate_k_nearest_neighbors(samples, squared_euclidean_distance(), 200, 10000, seed, edges);
- DLIB_TEST(edges.size() == 4*3/2);
- }
-
- void perform_test (
- )
- {
- for (int i = 0; i < 5; ++i)
- {
- do_the_test();
-
- ++seed;
- test_knn1_approx();
- test_knn2_approx();
- }
- test_knn1();
- test_knn2();
- test_knn_lsh_sparse<double>();
- test_knn_lsh_sparse<float>();
- test_knn_lsh_dense<double>();
- test_knn_lsh_dense<float>();
-
- }
- };
-
- // Create an instance of this object. Doing this causes this test
- // to be automatically inserted into the testing framework whenever this cpp file
- // is linked into the project. Note that since we are inside an unnamed-namespace
- // we won't get any linker errors about the symbol a being defined multiple times.
- linear_manifold_regularizer_tester a;
-
-}
-
-
-