ml/dlib/dlib/svm/structural_svm_distributed_abstract.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357

// Copyright (C) 2011  Davis E. King (davis@dlib.net)
// License: Boost Software License   See LICENSE.txt for the full license.
#undef DLIB_STRUCTURAL_SVM_DISTRIBUTeD_ABSTRACT_Hh_
#ifdef DLIB_STRUCTURAL_SVM_DISTRIBUTeD_ABSTRACT_Hh_


#include "structural_svm_problem_abstract.h"
#include "../optimization/optimization_oca_abstract.h"

namespace dlib
{

// ----------------------------------------------------------------------------------------

    class svm_struct_processing_node : noncopyable
    {
        /*!
            WHAT THIS OBJECT REPRESENTS
                This object is a tool for distributing the work involved in solving
                a dlib::structural_svm_problem across many computers.  It is used in 
                conjunction with the svm_struct_controller_node defined below.
        !*/

    public:

        template <
            typename T,
            typename U 
            >
        svm_struct_processing_node (
            const structural_svm_problem<T,U>& problem,
            unsigned short port,
            unsigned short num_threads
        );
        /*!
            requires
                - port != 0
                - problem.get_num_samples() != 0
                - problem.get_num_dimensions() != 0
            ensures
                - This object will listen on the given port for a TCP connection from a 
                  svm_struct_controller_node.  Once connected, the controller node will 
                  be able to access the given problem.
                - Will use num_threads threads at a time to make concurrent calls to the 
                  problem.separation_oracle() routine.  You should set this parameter equal 
                  to the number of available processing cores.
                - Note that the following parameters within the given problem are ignored:
                    - problem.get_c()
                    - problem.get_epsilon()
                    - problem.get_cache_based_epsilon()
                    - problem.num_nuclear_norm_regularizers()
                    - weather the problem is verbose or not
                  Instead, they are defined by the svm_struct_controller_node. Note, however,
                  that the problem.get_max_cache_size() parameter is meaningful and controls
                  the size of the separation oracle cache within a svm_struct_processing_node.
        !*/
    };

// ----------------------------------------------------------------------------------------

    class svm_struct_controller_node : noncopyable
    {
        /*!
            INITIAL VALUE
                - get_num_processing_nodes() == 0
                - get_epsilon() == 0.001
                - get_max_iterations() == 10000
                - get_c() == 1
                - This object will not be verbose

            WHAT THIS OBJECT REPRESENTS
                This object is a tool for distributing the work involved in solving a 
                dlib::structural_svm_problem across many computers.  The best way to understand
                its use is via example:

                First, suppose you have defined a structural_svm_problem object by inheriting from 
                it and defining the appropriate virtual functions.  You could solve it by passing 
                an instance to the oca optimizer.  However, if your separation oracle takes a long 
                time to evaluate then the optimization will take a long time to solve.  To speed 
                this up we can distribute the calls to the separation oracle across many computers.  
                
                To make this concrete, lets imagine you want to distribute the work across three 
                computers. You can accomplish this by creating four programs.  One containing a 
                svm_struct_controller_node and three containing svm_struct_processing_nodes.

                The programs might look like this:

                Controller program:
                    int main() 
                    {
                        svm_struct_controller_node cont;
                        cont.set_c(100);
                        // Tell cont where the processing nodes are on your network.
                        cont.add_processing_node("192.168.1.10:12345");
                        cont.add_processing_node("192.168.1.11:12345");
                        cont.add_processing_node("192.168.1.12:12345");
                        matrix<double> w;
                        oca solver;
                        cont(solver, w); // Run the optimization.
                        // After this finishes w will contain the solution vector.
                    }

                Processing programs (they are all the same, except that each loads a different subset
                of the training data):
                    int main()
                    {
                        // Put one third of your data into this problem object.  How you do this depends on your problem.
                        your_structural_svm_problem problem;
                        svm_struct_processing_node node(problem, 12345, number_of_cores_on_this_computer);
                        cout << "hit enter to terminate this program" << endl;
                        cin.get();
                    }

        !*/

    public:

        svm_struct_controller_node (
        );
        /*!
            ensures
                - this object is properly initialized
        !*/

        void set_epsilon (
            double eps
        );
        /*!
            requires
                - eps > 0
            ensures
                - #get_epsilon() == eps
        !*/

        double get_epsilon (
        ) const;
        /*!
            ensures
                - returns the error epsilon that determines when training should stop.
                  Smaller values may result in a more accurate solution but take longer 
                  to execute.  Specifically, the algorithm stops when the average sample
                  risk (i.e. R(w) as defined by the dlib::structural_svm_problem object) is 
                  within epsilon of its optimal value.

                  Also note that sample risk is an upper bound on a sample's loss.  So
                  you can think of this epsilon value as saying "solve the optimization
                  problem until the average loss per sample is within epsilon of it's 
                  optimal value".
        !*/

        double get_cache_based_epsilon (
        ) const;
        /*!
            ensures
                - if (get_max_cache_size() != 0) then
                    - The solver will not stop when the average sample risk is within
                      get_epsilon() of its optimal value.  Instead, it will keep running
                      but will run the optimizer completely on the cache until the average
                      sample risk is within #get_cache_based_epsilon() of its optimal
                      value.  This means that it will perform this additional refinement in
                      the solution accuracy without making any additional calls to the
                      separation_oracle().  This is useful when using a nuclear norm
                      regularization term because it allows you to quickly solve the
                      optimization problem to a high precision, which in the case of a
                      nuclear norm regularized problem means that many of the learned
                      matrices will be low rank or very close to low rank due to the
                      nuclear norm regularizer.  This may not happen without solving the
                      problem to a high accuracy or their ranks may be difficult to
                      determine, so the extra accuracy given by the cache based refinement
                      is very useful.  Finally, note that we include the nuclear norm term
                      as part of the "risk" for the purposes of determining when to stop.  
                - else
                    - The value of #get_cache_based_epsilon() has no effect.
        !*/

        void set_cache_based_epsilon (
            double eps
        );
        /*!
            requires
                - eps > 0
            ensures
                - #get_cache_based_epsilon() == eps
        !*/

        void set_max_iterations (
            unsigned long max_iter
        );
        /*!
            ensures
                - #get_max_iterations() == max_iter
        !*/

        unsigned long get_max_iterations (
        ); 
        /*!
            ensures
                - returns the maximum number of iterations the SVM optimizer is allowed to
                  run before it is required to stop and return a result.
        !*/

        void add_nuclear_norm_regularizer (
            long first_dimension,
            long rows,
            long cols,
            double regularization_strength
        );
        /*!
            requires
                - 0 <= first_dimension < number of dimensions in problem 
                - 0 <= rows
                - 0 <= cols
                - first_dimension+rows*cols <= number of dimensions in problem
                - 0 < regularization_strength
            ensures
                - Adds a nuclear norm regularization term to the optimization problem
                  solved by this object.  That is, instead of solving:
                    Minimize: h(w) == 0.5*dot(w,w) + C*R(w)
                  this object will solve:
                    Minimize: h(w) == 0.5*dot(w,w) + C*R(w) + regularization_strength*nuclear_norm_of(part of w)
                  where "part of w" is the part of w indicated by the arguments to this
                  function. In particular, the part of w included in the nuclear norm is
                  exactly the matrix reshape(rowm(w, range(first_dimension, first_dimension+rows*cols-1)), rows, cols).
                  Therefore, if you think of the w vector as being the concatenation of a
                  bunch of matrices then you can use multiple calls to add_nuclear_norm_regularizer() 
                  to add nuclear norm regularization terms to any of the matrices packed into w.
                - #num_nuclear_norm_regularizers() == num_nuclear_norm_regularizers() + 1
        !*/

        unsigned long num_nuclear_norm_regularizers (
        ) const; 
        /*!
            ensures
                - returns the number of nuclear norm regularizers that are currently a part
                  of this optimization problem.  That is, returns the number of times
                  add_nuclear_norm_regularizer() has been called since the last call to
                  clear_nuclear_norm_regularizers() or object construction, whichever is
                  most recent.
        !*/

        void clear_nuclear_norm_regularizers (
        );
        /*!
            ensures
                - #num_nuclear_norm_regularizers() == 0
        !*/

        void be_verbose (
        );
        /*!
            ensures
                - This object will print status messages to standard out so that a 
                  user can observe the progress of the algorithm.
        !*/

        void be_quiet(
        );
        /*!
            ensures
                - this object will not print anything to standard out
        !*/

        double get_c (
        ) const;
        /*!
            ensures
                - returns the SVM regularization parameter.  It is the parameter that 
                  determines the trade off between trying to fit the training data 
                  exactly or allowing more errors but hopefully improving the 
                  generalization of the resulting classifier.  Larger values encourage 
                  exact fitting while smaller values of C may encourage better 
                  generalization. 
        !*/

        void set_c (
            double C
        );
        /*!
            requires
                - C > 0
            ensures
                - #get_c() == C
        !*/

        void add_processing_node (
            const network_address& addr
        );
        /*!
            requires
                - addr.port != 0
            ensures
                - if (this address hasn't already been added) then
                    - #get_num_processing_nodes() == get_num_processing_nodes() + 1
                    - When operator() is invoked to solve the structural svm problem this
                      object will connect to the svm_struct_processing_node located at the
                      given network address and will include it in the distributed
                      optimization.
        !*/

        void add_processing_node (
            const std::string& ip_or_hostname,
            unsigned short port
        );
        /*!
            requires
                - port != 0
            ensures
                - invokes: add_processing_node(network_address(ip_or_hostname, port))
        !*/

        unsigned long get_num_processing_nodes (
        ) const;
        /*!
            ensures
                - returns the number of remote processing nodes that have been
                  registered with this object.
        !*/

        void remove_processing_nodes (
        );
        /*!
            ensures
                - #get_num_processing_nodes() == 0
        !*/

        class invalid_problem : public error {};

        template <typename matrix_type>
        double operator() (
            const oca& solver,
            matrix_type& w
        ) const;
        /*!
            requires
                - get_num_processing_nodes() != 0
                - matrix_type == a dlib::matrix capable of storing column vectors
            ensures
                - connects to the processing nodes and begins optimizing the structural
                  svm problem using the given oca solver.
                - stores the solution in #w
                - returns the objective value at the solution #w
            throws
                - invalid_problem
                  This exception is thrown if the svm_struct_processing_nodes disagree
                  on the dimensionality of the problem.  That is, if they disagree on
                  the value of structural_svm_problem::get_num_dimensions().
        !*/

    };

// ----------------------------------------------------------------------------------------

}

#endif // DLIB_STRUCTURAL_SVM_DISTRIBUTeD_ABSTRACT_Hh_