1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
|
// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
/*
This is an example illustrating the use of the linear_manifold_regularizer
and empirical_kernel_map from the dlib C++ Library.
This example program assumes you are familiar with some general elements of
the library. In particular, you should have at least read the svm_ex.cpp
and matrix_ex.cpp examples. You should also have read the empirical_kernel_map_ex.cpp
example program as the present example builds upon it.
This program shows an example of what is called semi-supervised learning.
That is, a small amount of labeled data is augmented with a large amount
of unlabeled data. A learning algorithm is then run on all the data
and the hope is that by including the unlabeled data we will end up with
a better result.
In this particular example we will generate 200,000 sample points of
unlabeled data along with 2 samples of labeled data. The sample points
will be drawn randomly from two concentric circles. One labeled data
point will be drawn from each circle. The goal is to learn to
correctly separate the two circles using only the 2 labeled points
and the unlabeled data.
To do this we will first run an approximate form of k nearest neighbors
to determine which of the unlabeled samples are closest together. We will
then make the manifold assumption, that is, we will assume that points close
to each other should share the same classification label.
Once we have determined which points are near neighbors we will use the
empirical_kernel_map and linear_manifold_regularizer to transform all the
data points into a new vector space where any linear rule will have similar
output for points which we have decided are near neighbors.
Finally, we will classify all the unlabeled data according to which of
the two labeled points are nearest. Normally this would not work but by
using the manifold assumption we will be able to successfully classify
all the unlabeled data.
For further information on this subject you should begin with the following
paper as it discusses a very similar application of manifold regularization.
Beyond the Point Cloud: from Transductive to Semi-supervised Learning
by Vikas Sindhwani, Partha Niyogi, and Mikhail Belkin
******** SAMPLE PROGRAM OUTPUT ********
Testing manifold regularization with an intrinsic_regularization_strength of 0.
number of edges generated: 49998
Running simple test...
error: 0.37022
error: 0.44036
error: 0.376715
error: 0.307545
error: 0.463455
error: 0.426065
error: 0.416155
error: 0.288295
error: 0.400115
error: 0.46347
Testing manifold regularization with an intrinsic_regularization_strength of 10000.
number of edges generated: 49998
Running simple test...
error: 0
error: 0
error: 0
error: 0
error: 0
error: 0
error: 0
error: 0
error: 0
error: 0
*/
#include <dlib/manifold_regularization.h>
#include <dlib/svm.h>
#include <dlib/rand.h>
#include <dlib/statistics.h>
#include <iostream>
#include <vector>
#include <ctime>
using namespace std;
using namespace dlib;
// ----------------------------------------------------------------------------------------
// First let's make a typedef for the kind of samples we will be using.
typedef matrix<double, 0, 1> sample_type;
// We will be using the radial_basis_kernel in this example program.
typedef radial_basis_kernel<sample_type> kernel_type;
// ----------------------------------------------------------------------------------------
void generate_circle (
std::vector<sample_type>& samples,
double radius,
const long num
);
/*!
requires
- num > 0
- radius > 0
ensures
- generates num points centered at (0,0) with the given radius. Adds these
points into the given samples vector.
!*/
// ----------------------------------------------------------------------------------------
void test_manifold_regularization (
const double intrinsic_regularization_strength
);
/*!
ensures
- Runs an example test using the linear_manifold_regularizer with the given
intrinsic_regularization_strength.
!*/
// ----------------------------------------------------------------------------------------
int main()
{
// Run the test without any manifold regularization.
test_manifold_regularization(0);
// Run the test with manifold regularization. You can think of this number as
// a measure of how much we trust the manifold assumption. So if you are really
// confident that you can select neighboring points which should have the same
// classification then make this number big.
test_manifold_regularization(10000.0);
}
// ----------------------------------------------------------------------------------------
void test_manifold_regularization (
const double intrinsic_regularization_strength
)
{
cout << "Testing manifold regularization with an intrinsic_regularization_strength of "
<< intrinsic_regularization_strength << ".\n";
std::vector<sample_type> samples;
// Declare an instance of the kernel we will be using.
const kernel_type kern(0.1);
const unsigned long num_points = 100000;
// create a large dataset with two concentric circles. There will be 100000 points on each circle
// for a total of 200000 samples.
generate_circle(samples, 2, num_points); // circle of radius 2
generate_circle(samples, 4, num_points); // circle of radius 4
// Create a set of sample_pairs that tells us which samples are "close" and should thus
// be classified similarly. These edges will be used to define the manifold regularizer.
// To find these edges we use a simple function that samples point pairs randomly and
// returns the top 5% with the shortest edges.
std::vector<sample_pair> edges;
find_percent_shortest_edges_randomly(samples, squared_euclidean_distance(), 0.05, 1000000, time(0), edges);
cout << "number of edges generated: " << edges.size() << endl;
empirical_kernel_map<kernel_type> ekm;
// Since the circles are not linearly separable we will use an empirical kernel map to
// map them into a space where they are separable. We create an empirical_kernel_map
// using a random subset of our data samples as basis samples. Note, however, that even
// though the circles are linearly separable in this new space given by the empirical_kernel_map
// we still won't be able to correctly classify all the points given just the 2 labeled examples.
// We will need to make use of the nearest neighbor information stored in edges. To do that
// we will use the linear_manifold_regularizer.
ekm.load(kern, randomly_subsample(samples, 50));
// Project all the samples into the span of our 50 basis samples
for (unsigned long i = 0; i < samples.size(); ++i)
samples[i] = ekm.project(samples[i]);
// Now create the manifold regularizer. The result is a transformation matrix that
// embodies the manifold assumption discussed above.
linear_manifold_regularizer<sample_type> lmr;
// use_gaussian_weights is a function object that tells lmr how to weight each edge. In this
// case we let the weight decay as edges get longer. So shorter edges are more important than
// longer edges.
lmr.build(samples, edges, use_gaussian_weights(0.1));
const matrix<double> T = lmr.get_transformation_matrix(intrinsic_regularization_strength);
// Apply the transformation generated by the linear_manifold_regularizer to
// all our samples.
for (unsigned long i = 0; i < samples.size(); ++i)
samples[i] = T*samples[i];
// For convenience, generate a projection_function and merge the transformation
// matrix T into it. That is, we will have: proj(x) == T*ekm.project(x).
projection_function<kernel_type> proj = ekm.get_projection_function();
proj.weights = T*proj.weights;
cout << "Running simple test..." << endl;
// Pick 2 different labeled points. One on the inner circle and another on the outer.
// For each of these test points we will see if using the single plane that separates
// them is a good way to separate the concentric circles. We also do this a bunch
// of times with different randomly chosen points so we can see how robust the result is.
for (int itr = 0; itr < 10; ++itr)
{
std::vector<sample_type> test_points;
// generate a random point from the radius 2 circle
generate_circle(test_points, 2, 1);
// generate a random point from the radius 4 circle
generate_circle(test_points, 4, 1);
// project the two test points into kernel space. Recall that this projection_function
// has the manifold regularizer incorporated into it.
const sample_type class1_point = proj(test_points[0]);
const sample_type class2_point = proj(test_points[1]);
double num_wrong = 0;
// Now attempt to classify all the data samples according to which point
// they are closest to. The output of this program shows that without manifold
// regularization this test will fail but with it it will perfectly classify
// all the points.
for (unsigned long i = 0; i < samples.size(); ++i)
{
double distance_to_class1 = length(samples[i] - class1_point);
double distance_to_class2 = length(samples[i] - class2_point);
bool predicted_as_class_1 = (distance_to_class1 < distance_to_class2);
bool really_is_class_1 = (i < num_points);
// now count how many times we make a mistake
if (predicted_as_class_1 != really_is_class_1)
++num_wrong;
}
cout << "error: "<< num_wrong/samples.size() << endl;
}
cout << endl;
}
// ----------------------------------------------------------------------------------------
dlib::rand rnd;
void generate_circle (
std::vector<sample_type>& samples,
double radius,
const long num
)
{
sample_type m(2,1);
for (long i = 0; i < num; ++i)
{
double sign = 1;
if (rnd.get_random_double() < 0.5)
sign = -1;
m(0) = 2*radius*rnd.get_random_double()-radius;
m(1) = sign*sqrt(radius*radius - m(0)*m(0));
samples.push_back(m);
}
}
// ----------------------------------------------------------------------------------------
|