1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
|
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_STRUCTURAL_SVM_DISTRIBUTeD_ABSTRACT_Hh_
#ifdef DLIB_STRUCTURAL_SVM_DISTRIBUTeD_ABSTRACT_Hh_
#include "structural_svm_problem_abstract.h"
#include "../optimization/optimization_oca_abstract.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
class svm_struct_processing_node : noncopyable
{
/*!
WHAT THIS OBJECT REPRESENTS
This object is a tool for distributing the work involved in solving
a dlib::structural_svm_problem across many computers. It is used in
conjunction with the svm_struct_controller_node defined below.
!*/
public:
template <
typename T,
typename U
>
svm_struct_processing_node (
const structural_svm_problem<T,U>& problem,
unsigned short port,
unsigned short num_threads
);
/*!
requires
- port != 0
- problem.get_num_samples() != 0
- problem.get_num_dimensions() != 0
ensures
- This object will listen on the given port for a TCP connection from a
svm_struct_controller_node. Once connected, the controller node will
be able to access the given problem.
- Will use num_threads threads at a time to make concurrent calls to the
problem.separation_oracle() routine. You should set this parameter equal
to the number of available processing cores.
- Note that the following parameters within the given problem are ignored:
- problem.get_c()
- problem.get_epsilon()
- problem.get_cache_based_epsilon()
- problem.num_nuclear_norm_regularizers()
- weather the problem is verbose or not
Instead, they are defined by the svm_struct_controller_node. Note, however,
that the problem.get_max_cache_size() parameter is meaningful and controls
the size of the separation oracle cache within a svm_struct_processing_node.
!*/
};
// ----------------------------------------------------------------------------------------
class svm_struct_controller_node : noncopyable
{
/*!
INITIAL VALUE
- get_num_processing_nodes() == 0
- get_epsilon() == 0.001
- get_max_iterations() == 10000
- get_c() == 1
- This object will not be verbose
WHAT THIS OBJECT REPRESENTS
This object is a tool for distributing the work involved in solving a
dlib::structural_svm_problem across many computers. The best way to understand
its use is via example:
First, suppose you have defined a structural_svm_problem object by inheriting from
it and defining the appropriate virtual functions. You could solve it by passing
an instance to the oca optimizer. However, if your separation oracle takes a long
time to evaluate then the optimization will take a long time to solve. To speed
this up we can distribute the calls to the separation oracle across many computers.
To make this concrete, lets imagine you want to distribute the work across three
computers. You can accomplish this by creating four programs. One containing a
svm_struct_controller_node and three containing svm_struct_processing_nodes.
The programs might look like this:
Controller program:
int main()
{
svm_struct_controller_node cont;
cont.set_c(100);
// Tell cont where the processing nodes are on your network.
cont.add_processing_node("192.168.1.10:12345");
cont.add_processing_node("192.168.1.11:12345");
cont.add_processing_node("192.168.1.12:12345");
matrix<double> w;
oca solver;
cont(solver, w); // Run the optimization.
// After this finishes w will contain the solution vector.
}
Processing programs (they are all the same, except that each loads a different subset
of the training data):
int main()
{
// Put one third of your data into this problem object. How you do this depends on your problem.
your_structural_svm_problem problem;
svm_struct_processing_node node(problem, 12345, number_of_cores_on_this_computer);
cout << "hit enter to terminate this program" << endl;
cin.get();
}
!*/
public:
svm_struct_controller_node (
);
/*!
ensures
- this object is properly initialized
!*/
void set_epsilon (
double eps
);
/*!
requires
- eps > 0
ensures
- #get_epsilon() == eps
!*/
double get_epsilon (
) const;
/*!
ensures
- returns the error epsilon that determines when training should stop.
Smaller values may result in a more accurate solution but take longer
to execute. Specifically, the algorithm stops when the average sample
risk (i.e. R(w) as defined by the dlib::structural_svm_problem object) is
within epsilon of its optimal value.
Also note that sample risk is an upper bound on a sample's loss. So
you can think of this epsilon value as saying "solve the optimization
problem until the average loss per sample is within epsilon of it's
optimal value".
!*/
double get_cache_based_epsilon (
) const;
/*!
ensures
- if (get_max_cache_size() != 0) then
- The solver will not stop when the average sample risk is within
get_epsilon() of its optimal value. Instead, it will keep running
but will run the optimizer completely on the cache until the average
sample risk is within #get_cache_based_epsilon() of its optimal
value. This means that it will perform this additional refinement in
the solution accuracy without making any additional calls to the
separation_oracle(). This is useful when using a nuclear norm
regularization term because it allows you to quickly solve the
optimization problem to a high precision, which in the case of a
nuclear norm regularized problem means that many of the learned
matrices will be low rank or very close to low rank due to the
nuclear norm regularizer. This may not happen without solving the
problem to a high accuracy or their ranks may be difficult to
determine, so the extra accuracy given by the cache based refinement
is very useful. Finally, note that we include the nuclear norm term
as part of the "risk" for the purposes of determining when to stop.
- else
- The value of #get_cache_based_epsilon() has no effect.
!*/
void set_cache_based_epsilon (
double eps
);
/*!
requires
- eps > 0
ensures
- #get_cache_based_epsilon() == eps
!*/
void set_max_iterations (
unsigned long max_iter
);
/*!
ensures
- #get_max_iterations() == max_iter
!*/
unsigned long get_max_iterations (
);
/*!
ensures
- returns the maximum number of iterations the SVM optimizer is allowed to
run before it is required to stop and return a result.
!*/
void add_nuclear_norm_regularizer (
long first_dimension,
long rows,
long cols,
double regularization_strength
);
/*!
requires
- 0 <= first_dimension < number of dimensions in problem
- 0 <= rows
- 0 <= cols
- first_dimension+rows*cols <= number of dimensions in problem
- 0 < regularization_strength
ensures
- Adds a nuclear norm regularization term to the optimization problem
solved by this object. That is, instead of solving:
Minimize: h(w) == 0.5*dot(w,w) + C*R(w)
this object will solve:
Minimize: h(w) == 0.5*dot(w,w) + C*R(w) + regularization_strength*nuclear_norm_of(part of w)
where "part of w" is the part of w indicated by the arguments to this
function. In particular, the part of w included in the nuclear norm is
exactly the matrix reshape(rowm(w, range(first_dimension, first_dimension+rows*cols-1)), rows, cols).
Therefore, if you think of the w vector as being the concatenation of a
bunch of matrices then you can use multiple calls to add_nuclear_norm_regularizer()
to add nuclear norm regularization terms to any of the matrices packed into w.
- #num_nuclear_norm_regularizers() == num_nuclear_norm_regularizers() + 1
!*/
unsigned long num_nuclear_norm_regularizers (
) const;
/*!
ensures
- returns the number of nuclear norm regularizers that are currently a part
of this optimization problem. That is, returns the number of times
add_nuclear_norm_regularizer() has been called since the last call to
clear_nuclear_norm_regularizers() or object construction, whichever is
most recent.
!*/
void clear_nuclear_norm_regularizers (
);
/*!
ensures
- #num_nuclear_norm_regularizers() == 0
!*/
void be_verbose (
);
/*!
ensures
- This object will print status messages to standard out so that a
user can observe the progress of the algorithm.
!*/
void be_quiet(
);
/*!
ensures
- this object will not print anything to standard out
!*/
double get_c (
) const;
/*!
ensures
- returns the SVM regularization parameter. It is the parameter that
determines the trade off between trying to fit the training data
exactly or allowing more errors but hopefully improving the
generalization of the resulting classifier. Larger values encourage
exact fitting while smaller values of C may encourage better
generalization.
!*/
void set_c (
double C
);
/*!
requires
- C > 0
ensures
- #get_c() == C
!*/
void add_processing_node (
const network_address& addr
);
/*!
requires
- addr.port != 0
ensures
- if (this address hasn't already been added) then
- #get_num_processing_nodes() == get_num_processing_nodes() + 1
- When operator() is invoked to solve the structural svm problem this
object will connect to the svm_struct_processing_node located at the
given network address and will include it in the distributed
optimization.
!*/
void add_processing_node (
const std::string& ip_or_hostname,
unsigned short port
);
/*!
requires
- port != 0
ensures
- invokes: add_processing_node(network_address(ip_or_hostname, port))
!*/
unsigned long get_num_processing_nodes (
) const;
/*!
ensures
- returns the number of remote processing nodes that have been
registered with this object.
!*/
void remove_processing_nodes (
);
/*!
ensures
- #get_num_processing_nodes() == 0
!*/
class invalid_problem : public error {};
template <typename matrix_type>
double operator() (
const oca& solver,
matrix_type& w
) const;
/*!
requires
- get_num_processing_nodes() != 0
- matrix_type == a dlib::matrix capable of storing column vectors
ensures
- connects to the processing nodes and begins optimizing the structural
svm problem using the given oca solver.
- stores the solution in #w
- returns the objective value at the solution #w
throws
- invalid_problem
This exception is thrown if the svm_struct_processing_nodes disagree
on the dimensionality of the problem. That is, if they disagree on
the value of structural_svm_problem::get_num_dimensions().
!*/
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_STRUCTURAL_SVM_DISTRIBUTeD_ABSTRACT_Hh_
|