diff options
Diffstat (limited to 'ml/dlib/dlib/test/linear_manifold_regularizer.cpp')
-rw-r--r-- | ml/dlib/dlib/test/linear_manifold_regularizer.cpp | 408 |
1 files changed, 0 insertions, 408 deletions
diff --git a/ml/dlib/dlib/test/linear_manifold_regularizer.cpp b/ml/dlib/dlib/test/linear_manifold_regularizer.cpp deleted file mode 100644 index e73b1c8d3..000000000 --- a/ml/dlib/dlib/test/linear_manifold_regularizer.cpp +++ /dev/null @@ -1,408 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. - -#include "tester.h" -#include <dlib/manifold_regularization.h> -#include <dlib/svm.h> -#include <dlib/rand.h> -#include <dlib/string.h> -#include <dlib/graph_utils_threaded.h> -#include <vector> -#include <sstream> -#include <ctime> - -namespace -{ - using namespace test; - using namespace dlib; - using namespace std; - dlib::logger dlog("test.linear_manifold_regularizer"); - - template <typename hash_type, typename samples_type> - void test_find_k_nearest_neighbors_lsh( - const samples_type& samples - ) - { - std::vector<sample_pair> edges1, edges2; - - find_k_nearest_neighbors(samples, cosine_distance(), 2, edges1); - find_k_nearest_neighbors_lsh(samples, cosine_distance(), hash_type(), 2, 6, edges2, 2); - - std::sort(edges1.begin(), edges1.end(), order_by_index<sample_pair>); - std::sort(edges2.begin(), edges2.end(), order_by_index<sample_pair>); - - DLIB_TEST_MSG(edges1.size() == edges2.size(), edges1.size() << " " << edges2.size()); - for (unsigned long i = 0; i < edges1.size(); ++i) - { - DLIB_TEST(edges1[i] == edges2[i]); - DLIB_TEST_MSG(std::abs(edges1[i].distance() - edges2[i].distance()) < 1e-7, - edges1[i].distance() - edges2[i].distance()); - } - } - - template <typename scalar_type> - void test_knn_lsh_sparse() - { - dlib::rand rnd; - std::vector<std::map<unsigned long,scalar_type> > samples; - samples.resize(20); - for (unsigned int i = 0; i < samples.size(); ++i) - { - samples[i][0] = rnd.get_random_gaussian(); - samples[i][2] = rnd.get_random_gaussian(); - } - - test_find_k_nearest_neighbors_lsh<hash_similar_angles_64>(samples); - test_find_k_nearest_neighbors_lsh<hash_similar_angles_128>(samples); - test_find_k_nearest_neighbors_lsh<hash_similar_angles_256>(samples); - test_find_k_nearest_neighbors_lsh<hash_similar_angles_512>(samples); - } - - template <typename scalar_type> - void test_knn_lsh_dense() - { - dlib::rand rnd; - std::vector<matrix<scalar_type,0,1> > samples; - samples.resize(20); - for (unsigned int i = 0; i < samples.size(); ++i) - { - samples[i].set_size(2); - samples[i](0) = rnd.get_random_gaussian(); - samples[i](1) = rnd.get_random_gaussian(); - } - - test_find_k_nearest_neighbors_lsh<hash_similar_angles_64>(samples); - test_find_k_nearest_neighbors_lsh<hash_similar_angles_128>(samples); - test_find_k_nearest_neighbors_lsh<hash_similar_angles_256>(samples); - test_find_k_nearest_neighbors_lsh<hash_similar_angles_512>(samples); - } - - - - class linear_manifold_regularizer_tester : public tester - { - /*! - WHAT THIS OBJECT REPRESENTS - This object represents a unit test. When it is constructed - it adds itself into the testing framework. - !*/ - public: - linear_manifold_regularizer_tester ( - ) : - tester ( - "test_linear_manifold_regularizer", // the command line argument name for this test - "Run tests on the linear_manifold_regularizer object.", // the command line argument description - 0 // the number of command line arguments for this test - ) - { - seed = 1; - } - - dlib::rand rnd; - - unsigned long seed; - - typedef matrix<double, 0, 1> sample_type; - typedef radial_basis_kernel<sample_type> kernel_type; - - void do_the_test() - { - print_spinner(); - std::vector<sample_type> samples; - - // Declare an instance of the kernel we will be using. - const kernel_type kern(0.1); - - const unsigned long num_points = 200; - - // create a large dataset with two concentric circles. - generate_circle(samples, 1, num_points); // circle of radius 1 - generate_circle(samples, 5, num_points); // circle of radius 5 - - std::vector<sample_pair> edges; - find_percent_shortest_edges_randomly(samples, squared_euclidean_distance(0.1, 4), 1, 10000, "random seed", edges); - - dlog << LTRACE << "number of edges generated: " << edges.size(); - - empirical_kernel_map<kernel_type> ekm; - - ekm.load(kern, randomly_subsample(samples, 100)); - - // Project all the samples into the span of our 50 basis samples - for (unsigned long i = 0; i < samples.size(); ++i) - samples[i] = ekm.project(samples[i]); - - - // Now create the manifold regularizer. The result is a transformation matrix that - // embodies the manifold assumption discussed above. - linear_manifold_regularizer<sample_type> lmr; - lmr.build(samples, edges, use_gaussian_weights(0.1)); - matrix<double> T = lmr.get_transformation_matrix(10000); - - print_spinner(); - - // generate the T matrix manually and make sure it matches. The point of this test - // is to make sure that the more complex version of this that happens inside the linear_manifold_regularizer - // is correct. It uses a tedious block of loops to do it in a way that is a lot faster for sparse - // W matrices but isn't super straight forward. - matrix<double> X(samples[0].size(), samples.size()); - for (unsigned long i = 0; i < samples.size(); ++i) - set_colm(X,i) = samples[i]; - - matrix<double> W(samples.size(), samples.size()); - W = 0; - for (unsigned long i = 0; i < edges.size(); ++i) - { - W(edges[i].index1(), edges[i].index2()) = use_gaussian_weights(0.1)(edges[i]); - W(edges[i].index2(), edges[i].index1()) = use_gaussian_weights(0.1)(edges[i]); - } - matrix<double> L = diagm(sum_rows(W)) - W; - matrix<double> trueT = inv_lower_triangular(chol(identity_matrix<double>(X.nr()) + (10000.0/sum(lowerm(W)))*X*L*trans(X))); - - dlog << LTRACE << "T error: "<< max(abs(T - trueT)); - DLIB_TEST(max(abs(T - trueT)) < 1e-7); - - - print_spinner(); - // Apply the transformation generated by the linear_manifold_regularizer to - // all our samples. - for (unsigned long i = 0; i < samples.size(); ++i) - samples[i] = T*samples[i]; - - - // For convenience, generate a projection_function and merge the transformation - // matrix T into it. - projection_function<kernel_type> proj = ekm.get_projection_function(); - proj.weights = T*proj.weights; - - - // Pick 2 different labeled points. One on the inner circle and another on the outer. - // For each of these test points we will see if using the single plane that separates - // them is a good way to separate the concentric circles. Also do this a bunch - // of times with different randomly chosen points so we can see how robust the result is. - for (int itr = 0; itr < 10; ++itr) - { - print_spinner(); - std::vector<sample_type> test_points; - // generate a random point from the radius 1 circle - generate_circle(test_points, 1, 1); - // generate a random point from the radius 5 circle - generate_circle(test_points, 5, 1); - - // project the two test points into kernel space. Recall that this projection_function - // has the manifold regularizer incorporated into it. - const sample_type class1_point = proj(test_points[0]); - const sample_type class2_point = proj(test_points[1]); - - double num_wrong = 0; - - // Now attempt to classify all the data samples according to which point - // they are closest to. The output of this program shows that without manifold - // regularization this test will fail but with it it will perfectly classify - // all the points. - for (unsigned long i = 0; i < samples.size(); ++i) - { - double distance_to_class1 = length(samples[i] - class1_point); - double distance_to_class2 = length(samples[i] - class2_point); - - bool predicted_as_class_1 = (distance_to_class1 < distance_to_class2); - - bool really_is_class_1 = (i < num_points); - - // now count how many times we make a mistake - if (predicted_as_class_1 != really_is_class_1) - ++num_wrong; - } - - DLIB_TEST_MSG(num_wrong == 0, num_wrong); - } - - } - - void generate_circle ( - std::vector<sample_type>& samples, - double radius, - const long num - ) - { - sample_type m(2,1); - - for (long i = 0; i < num; ++i) - { - double sign = 1; - if (rnd.get_random_double() < 0.5) - sign = -1; - m(0) = 2*radius*rnd.get_random_double()-radius; - m(1) = sign*sqrt(radius*radius - m(0)*m(0)); - - samples.push_back(m); - } - } - - - void test_knn1() - { - std::vector<matrix<double,2,1> > samples; - - matrix<double,2,1> test; - - test = 0,0; samples.push_back(test); - test = 1,1; samples.push_back(test); - test = 1,-1; samples.push_back(test); - test = -1,1; samples.push_back(test); - test = -1,-1; samples.push_back(test); - - std::vector<sample_pair> edges; - find_k_nearest_neighbors(samples, squared_euclidean_distance(), 1, edges); - DLIB_TEST(edges.size() == 4); - - std::sort(edges.begin(), edges.end(), &order_by_index<sample_pair>); - - DLIB_TEST(edges[0] == sample_pair(0,1,0)); - DLIB_TEST(edges[1] == sample_pair(0,2,0)); - DLIB_TEST(edges[2] == sample_pair(0,3,0)); - DLIB_TEST(edges[3] == sample_pair(0,4,0)); - - find_k_nearest_neighbors(samples, squared_euclidean_distance(), 3, edges); - DLIB_TEST(edges.size() == 8); - - find_k_nearest_neighbors(samples, squared_euclidean_distance(3.9, 4.1), 3, edges); - DLIB_TEST(edges.size() == 4); - - std::sort(edges.begin(), edges.end(), &order_by_index<sample_pair>); - - DLIB_TEST(edges[0] == sample_pair(1,2,0)); - DLIB_TEST(edges[1] == sample_pair(1,3,0)); - DLIB_TEST(edges[2] == sample_pair(2,4,0)); - DLIB_TEST(edges[3] == sample_pair(3,4,0)); - - find_k_nearest_neighbors(samples, squared_euclidean_distance(30000, 4.1), 3, edges); - DLIB_TEST(edges.size() == 0); - } - - void test_knn1_approx() - { - std::vector<matrix<double,2,1> > samples; - - matrix<double,2,1> test; - - test = 0,0; samples.push_back(test); - test = 1,1; samples.push_back(test); - test = 1,-1; samples.push_back(test); - test = -1,1; samples.push_back(test); - test = -1,-1; samples.push_back(test); - - std::vector<sample_pair> edges; - find_approximate_k_nearest_neighbors(samples, squared_euclidean_distance(), 1, 10000, seed, edges); - DLIB_TEST(edges.size() == 4); - - std::sort(edges.begin(), edges.end(), &order_by_index<sample_pair>); - - DLIB_TEST(edges[0] == sample_pair(0,1,0)); - DLIB_TEST(edges[1] == sample_pair(0,2,0)); - DLIB_TEST(edges[2] == sample_pair(0,3,0)); - DLIB_TEST(edges[3] == sample_pair(0,4,0)); - - find_approximate_k_nearest_neighbors(samples, squared_euclidean_distance(), 3, 10000, seed, edges); - DLIB_TEST(edges.size() == 8); - - find_approximate_k_nearest_neighbors(samples, squared_euclidean_distance(3.9, 4.1), 3, 10000, seed, edges); - DLIB_TEST(edges.size() == 4); - - std::sort(edges.begin(), edges.end(), &order_by_index<sample_pair>); - - DLIB_TEST(edges[0] == sample_pair(1,2,0)); - DLIB_TEST(edges[1] == sample_pair(1,3,0)); - DLIB_TEST(edges[2] == sample_pair(2,4,0)); - DLIB_TEST(edges[3] == sample_pair(3,4,0)); - - find_approximate_k_nearest_neighbors(samples, squared_euclidean_distance(30000, 4.1), 3, 10000, seed, edges); - DLIB_TEST(edges.size() == 0); - } - - void test_knn2() - { - std::vector<matrix<double,2,1> > samples; - - matrix<double,2,1> test; - - test = 1,1; samples.push_back(test); - test = 1,-1; samples.push_back(test); - test = -1,1; samples.push_back(test); - test = -1,-1; samples.push_back(test); - - std::vector<sample_pair> edges; - find_k_nearest_neighbors(samples, squared_euclidean_distance(), 2, edges); - DLIB_TEST(edges.size() == 4); - - std::sort(edges.begin(), edges.end(), &order_by_index<sample_pair>); - - DLIB_TEST(edges[0] == sample_pair(0,1,0)); - DLIB_TEST(edges[1] == sample_pair(0,2,0)); - DLIB_TEST(edges[2] == sample_pair(1,3,0)); - DLIB_TEST(edges[3] == sample_pair(2,3,0)); - - find_k_nearest_neighbors(samples, squared_euclidean_distance(), 200, edges); - DLIB_TEST(edges.size() == 4*3/2); - } - - void test_knn2_approx() - { - std::vector<matrix<double,2,1> > samples; - - matrix<double,2,1> test; - - test = 1,1; samples.push_back(test); - test = 1,-1; samples.push_back(test); - test = -1,1; samples.push_back(test); - test = -1,-1; samples.push_back(test); - - std::vector<sample_pair> edges; - // For this simple graph and high number of samples we will do we should obtain the exact - // knn solution. - find_approximate_k_nearest_neighbors(samples, squared_euclidean_distance(), 2, 10000, seed, edges); - DLIB_TEST(edges.size() == 4); - - std::sort(edges.begin(), edges.end(), &order_by_index<sample_pair>); - - DLIB_TEST(edges[0] == sample_pair(0,1,0)); - DLIB_TEST(edges[1] == sample_pair(0,2,0)); - DLIB_TEST(edges[2] == sample_pair(1,3,0)); - DLIB_TEST(edges[3] == sample_pair(2,3,0)); - - - find_approximate_k_nearest_neighbors(samples, squared_euclidean_distance(), 200, 10000, seed, edges); - DLIB_TEST(edges.size() == 4*3/2); - } - - void perform_test ( - ) - { - for (int i = 0; i < 5; ++i) - { - do_the_test(); - - ++seed; - test_knn1_approx(); - test_knn2_approx(); - } - test_knn1(); - test_knn2(); - test_knn_lsh_sparse<double>(); - test_knn_lsh_sparse<float>(); - test_knn_lsh_dense<double>(); - test_knn_lsh_dense<float>(); - - } - }; - - // Create an instance of this object. Doing this causes this test - // to be automatically inserted into the testing framework whenever this cpp file - // is linked into the project. Note that since we are inside an unnamed-namespace - // we won't get any linker errors about the symbol a being defined multiple times. - linear_manifold_regularizer_tester a; - -} - - - |