ml/dlib/dlib/svm/structural_svm_problem_threaded.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157

// Copyright (C) 2011  Davis E. King (davis@dlib.net)
// License: Boost Software License   See LICENSE.txt for the full license.
#ifndef DLIB_STRUCTURAL_SVM_PRObLEM_THREADED_Hh_
#define DLIB_STRUCTURAL_SVM_PRObLEM_THREADED_Hh_

#include "structural_svm_problem_threaded_abstract.h"
#include "../algs.h"
#include <vector>
#include "structural_svm_problem.h"
#include "../matrix.h"
#include "sparse_vector.h"
#include <iostream>
#include "../threads.h"
#include "../misc_api.h"
#include "../statistics.h"

namespace dlib
{

// ----------------------------------------------------------------------------------------

    template <
        typename matrix_type_,
        typename feature_vector_type_ = matrix_type_
        >
    class structural_svm_problem_threaded : public structural_svm_problem<matrix_type_,feature_vector_type_> 
    {
    public:

        typedef matrix_type_ matrix_type;
        typedef typename matrix_type::type scalar_type;
        typedef feature_vector_type_ feature_vector_type;

        explicit structural_svm_problem_threaded (
            unsigned long num_threads
        ) :
            tp(num_threads),
            num_iterations_executed(0)
        {}

        unsigned long get_num_threads (
        ) const { return tp.num_threads_in_pool(); }

    private:

        struct binder
        {
            binder (
                const structural_svm_problem_threaded& self_,
                const matrix_type& w_,
                matrix_type& subgradient_,
                scalar_type& total_loss_,
                bool buffer_subgradients_locally_
            ) : self(self_), w(w_), subgradient(subgradient_), total_loss(total_loss_),
                buffer_subgradients_locally(buffer_subgradients_locally_){}

            void call_oracle (
                long begin,
                long end
            ) 
            {
                // If we are only going to call the separation oracle once then don't run
                // the slightly more complex for loop version of this code.  Or if we just
                // don't want to run the complex buffering one.  The code later on decides
                // if we should do the buffering based on how long it takes to execute.  We
                // do this because, when the subgradient is really high dimensional it can
                // take a lot of time to add them together.  So we might want to avoid
                // doing that.
                if (end-begin <= 1 || !buffer_subgradients_locally)
                {
                    scalar_type loss;
                    feature_vector_type ftemp;
                    for (long i = begin; i < end; ++i)
                    {
                        self.separation_oracle_cached(i, w, loss, ftemp);

                        auto_mutex lock(self.accum_mutex);
                        total_loss += loss;
                        add_to(subgradient, ftemp);
                    }
                }
                else
                {
                    scalar_type loss = 0;
                    matrix_type faccum(subgradient.size(),1);
                    faccum = 0;

                    feature_vector_type ftemp;

                    for (long i = begin; i < end; ++i)
                    {
                        scalar_type loss_temp;
                        self.separation_oracle_cached(i, w, loss_temp, ftemp);
                        loss += loss_temp;
                        add_to(faccum, ftemp);
                    }

                    auto_mutex lock(self.accum_mutex);
                    total_loss += loss;
                    add_to(subgradient, faccum);
                }
            }

            const structural_svm_problem_threaded& self;
            const matrix_type& w;
            matrix_type& subgradient;
            scalar_type& total_loss;
            bool buffer_subgradients_locally;
        };


        virtual void call_separation_oracle_on_all_samples (
            const matrix_type& w,
            matrix_type& subgradient,
            scalar_type& total_loss
        ) const
        {
            ++num_iterations_executed;

            const uint64 start_time = ts.get_timestamp();

            bool buffer_subgradients_locally = with_buffer_time.mean() < without_buffer_time.mean();

            // every 50 iterations we should try to flip the buffering scheme to see if
            // doing it the other way might be better.  
            if ((num_iterations_executed%50) == 0)
            {
                buffer_subgradients_locally = !buffer_subgradients_locally;
            }

            binder b(*this, w, subgradient, total_loss, buffer_subgradients_locally);
            parallel_for_blocked(tp, 0, this->get_num_samples(), b, &binder::call_oracle);

            const uint64 stop_time = ts.get_timestamp();

            if (buffer_subgradients_locally)
                with_buffer_time.add(stop_time-start_time);
            else
                without_buffer_time.add(stop_time-start_time);

        }

        mutable thread_pool tp;
        mutable mutex accum_mutex;
        mutable timestamper ts;
        mutable running_stats<double> with_buffer_time;
        mutable running_stats<double> without_buffer_time;
        mutable unsigned long num_iterations_executed;
    };

// ----------------------------------------------------------------------------------------

}

#endif // DLIB_STRUCTURAL_SVM_PRObLEM_THREADED_Hh_