ml/dlib/examples/bayes_net_ex.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307

// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
/*
    This is an example illustrating the use of the Bayesian Network 
    inference utilities found in the dlib C++ library.
    

    In this example all the nodes in the Bayesian network are
    boolean variables.  That is, they take on either the value
    0 or the value 1.

    The network contains 4 nodes and looks as follows:

        B     C
        \\   //
         \/ \/ 
           A
           ||
           \/
            D


    The probabilities of each node are summarized below.  (The probability
    of each node being 0 is not listed since it is just P(X=0) = 1-p(X=1) ) 

        p(B=1) = 0.01

        p(C=1) = 0.001

        p(A=1 | B=0, C=0) = 0.01  
        p(A=1 | B=0, C=1) = 0.5
        p(A=1 | B=1, C=0) = 0.9
        p(A=1 | B=1, C=1) = 0.99 

        p(D=1 | A=0) = 0.2 
        p(D=1 | A=1) = 0.5

*/


#include <dlib/bayes_utils.h>
#include <dlib/graph_utils.h>
#include <dlib/graph.h>
#include <dlib/directed_graph.h>
#include <iostream>


using namespace dlib;
using namespace std;

// ----------------------------------------------------------------------------------------

int main()
{
    try
    {
        // There are many useful convenience functions in this namespace.  They all
        // perform simple access or modify operations on the nodes of a bayesian network. 
        // You don't have to use them but they are convenient and they also will check for
        // various errors in your bayesian network when your application is built with
        // the DEBUG or ENABLE_ASSERTS preprocessor definitions defined.  So their use
        // is recommended.  In fact, most of the global functions used in this example 
        // program are from this namespace.
        using namespace bayes_node_utils;

        // This statement declares a bayesian network called bn.  Note that a bayesian network
        // in the dlib world is just a directed_graph object that contains a special kind 
        // of node called a bayes_node.
        directed_graph<bayes_node>::kernel_1a_c bn;

        // Use an enum to make some more readable names for our nodes.
        enum nodes
        {
            A = 0,
            B = 1,
            C = 2,
            D = 3
        };

        // The next few blocks of code setup our bayesian network.

        // The first thing we do is tell the bn object how many nodes it has
        // and also add the three edges.  Again, we are using the network
        // shown in ASCII art at the top of this file.
        bn.set_number_of_nodes(4);
        bn.add_edge(A, D);
        bn.add_edge(B, A);
        bn.add_edge(C, A);


        // Now we inform all the nodes in the network that they are binary
        // nodes.  That is, they only have two possible values.  
        set_node_num_values(bn, A, 2);
        set_node_num_values(bn, B, 2);
        set_node_num_values(bn, C, 2);
        set_node_num_values(bn, D, 2);

        assignment parent_state;
        // Now we will enter all the conditional probability information for each node.
        // Each node's conditional probability is dependent on the state of its parents.  
        // To specify this state we need to use the assignment object.  This assignment 
        // object allows us to specify the state of each nodes parents. 


        // Here we specify that p(B=1) = 0.01
        // parent_state is empty in this case since B is a root node. 
        set_node_probability(bn, B, 1, parent_state, 0.01);
        // Here we specify that p(B=0) = 1-0.01
        set_node_probability(bn, B, 0, parent_state, 1-0.01);


        // Here we specify that p(C=1) = 0.001
        // parent_state is empty in this case since B is a root node. 
        set_node_probability(bn, C, 1, parent_state, 0.001);
        // Here we specify that p(C=0) = 1-0.001
        set_node_probability(bn, C, 0, parent_state, 1-0.001);


        // This is our first node that has parents. So we set the parent_state
        // object to reflect that A has both B and C as parents.
        parent_state.add(B, 1);
        parent_state.add(C, 1);
        // Here we specify that p(A=1 | B=1, C=1) = 0.99 
        set_node_probability(bn, A, 1, parent_state, 0.99);
        // Here we specify that p(A=0 | B=1, C=1) = 1-0.99 
        set_node_probability(bn, A, 0, parent_state, 1-0.99);

        // Here we use the [] notation because B and C have already
        // been added into parent state.  
        parent_state[B] = 1;
        parent_state[C] = 0;
        // Here we specify that p(A=1 | B=1, C=0) = 0.9 
        set_node_probability(bn, A, 1, parent_state, 0.9);
        set_node_probability(bn, A, 0, parent_state, 1-0.9);

        parent_state[B] = 0;
        parent_state[C] = 1;
        // Here we specify that p(A=1 | B=0, C=1) = 0.5 
        set_node_probability(bn, A, 1, parent_state, 0.5);
        set_node_probability(bn, A, 0, parent_state, 1-0.5);

        parent_state[B] = 0;
        parent_state[C] = 0;
        // Here we specify that p(A=1 | B=0, C=0) = 0.01 
        set_node_probability(bn, A, 1, parent_state, 0.01);
        set_node_probability(bn, A, 0, parent_state, 1-0.01);


        // Here we set probabilities for node D.
        // First we clear out parent state so that it doesn't have any of
        // the assignments for the B and C nodes used above.
        parent_state.clear();
        parent_state.add(A,1);
        // Here we specify that p(D=1 | A=1) = 0.5 
        set_node_probability(bn, D, 1, parent_state, 0.5);
        set_node_probability(bn, D, 0, parent_state, 1-0.5);

        parent_state[A] = 0;
        // Here we specify that p(D=1 | A=0) = 0.2 
        set_node_probability(bn, D, 1, parent_state, 0.2);
        set_node_probability(bn, D, 0, parent_state, 1-0.2);


        // We have now finished setting up our bayesian network.  So let's compute some 
        // probability values.  The first thing we will do is compute the prior probability
        // of each node in the network.  To do this we will use the join tree algorithm which
        // is an algorithm for performing exact inference in a bayesian network.   

        // First we need to create an undirected graph which contains set objects at each node and
        // edge.  This long declaration does the trick.
        typedef dlib::set<unsigned long>::compare_1b_c set_type;
        typedef graph<set_type, set_type>::kernel_1a_c join_tree_type;
        join_tree_type join_tree;

        // Now we need to populate the join_tree with data from our bayesian network.  The next  
        // function calls do this.  Explaining exactly what they do is outside the scope of this
        // example.  Just think of them as filling join_tree with information that is useful 
        // later on for dealing with our bayesian network.  
        create_moral_graph(bn, join_tree);
        create_join_tree(join_tree, join_tree);

        // Now that we have a proper join_tree we can use it to obtain a solution to our
        // bayesian network.  Doing this is as simple as declaring an instance of
        // the bayesian_network_join_tree object as follows:
        bayesian_network_join_tree solution(bn, join_tree);


        // now print out the probabilities for each node
        cout << "Using the join tree algorithm:\n";
        cout << "p(A=1) = " << solution.probability(A)(1) << endl;
        cout << "p(A=0) = " << solution.probability(A)(0) << endl;
        cout << "p(B=1) = " << solution.probability(B)(1) << endl;
        cout << "p(B=0) = " << solution.probability(B)(0) << endl;
        cout << "p(C=1) = " << solution.probability(C)(1) << endl;
        cout << "p(C=0) = " << solution.probability(C)(0) << endl;
        cout << "p(D=1) = " << solution.probability(D)(1) << endl;
        cout << "p(D=0) = " << solution.probability(D)(0) << endl;
        cout << "\n\n\n";


        // Now to make things more interesting let's say that we have discovered that the C 
        // node really has a value of 1.  That is to say, we now have evidence that 
        // C is 1.  We can represent this in the network using the following two function
        // calls.
        set_node_value(bn, C, 1);
        set_node_as_evidence(bn, C);

        // Now we want to compute the probabilities of all the nodes in the network again
        // given that we now know that C is 1.  We can do this as follows:
        bayesian_network_join_tree solution_with_evidence(bn, join_tree);

        // now print out the probabilities for each node
        cout << "Using the join tree algorithm:\n";
        cout << "p(A=1 | C=1) = " << solution_with_evidence.probability(A)(1) << endl;
        cout << "p(A=0 | C=1) = " << solution_with_evidence.probability(A)(0) << endl;
        cout << "p(B=1 | C=1) = " << solution_with_evidence.probability(B)(1) << endl;
        cout << "p(B=0 | C=1) = " << solution_with_evidence.probability(B)(0) << endl;
        cout << "p(C=1 | C=1) = " << solution_with_evidence.probability(C)(1) << endl;
        cout << "p(C=0 | C=1) = " << solution_with_evidence.probability(C)(0) << endl;
        cout << "p(D=1 | C=1) = " << solution_with_evidence.probability(D)(1) << endl;
        cout << "p(D=0 | C=1) = " << solution_with_evidence.probability(D)(0) << endl;
        cout << "\n\n\n";

        // Note that when we made our solution_with_evidence object we reused our join_tree object.
        // This saves us the time it takes to calculate the join_tree object from scratch.  But
        // it is important to note that we can only reuse the join_tree object if we haven't changed
        // the structure of our bayesian network.  That is, if we have added or removed nodes or 
        // edges from our bayesian network then we must recompute our join_tree.  But in this example
        // all we did was change the value of a bayes_node object (we made node C be evidence)
        // so we are ok.


        // Next this example will show you how to use the bayesian_network_gibbs_sampler object
        // to perform approximate inference in a bayesian network.  This is an algorithm 
        // that doesn't give you an exact solution but it may be necessary to use in some 
        // instances.  For example, the join tree algorithm used above, while fast in many
        // instances, has exponential runtime in some cases.  Moreover, inference in bayesian
        // networks is NP-Hard for general networks so sometimes the best you can do is
        // find an approximation.
        // However, it should be noted that the gibbs sampler does not compute the correct
        // probabilities if the network contains a deterministic node.  That is, if any
        // of the conditional probability tables in the bayesian network have a probability
        // of 1.0 for something the gibbs sampler should not be used.


        // This Gibbs sampler algorithm works by randomly sampling possibles values of the
        // network.  So to use it we should set the network to some initial state.  

        set_node_value(bn, A, 0);
        set_node_value(bn, B, 0);
        set_node_value(bn, D, 0);

        // We will leave the C node with a value of 1 and keep it as an evidence node.  


        // First create an instance of the gibbs sampler object
        bayesian_network_gibbs_sampler sampler;


        // To use this algorithm all we do is go into a loop for a certain number of times
        // and each time through we sample the bayesian network.  Then we count how 
        // many times a node has a certain state.  Then the probability of that node
        // having that state is just its count/total times through the loop. 

        // The following code illustrates the general procedure.
        unsigned long A_count = 0;
        unsigned long B_count = 0;
        unsigned long C_count = 0;
        unsigned long D_count = 0;

        // The more times you let the loop run the more accurate the result will be.  Here we loop
        // 2000 times.
        const long rounds = 2000;
        for (long i = 0; i < rounds; ++i)
        {
            sampler.sample_graph(bn);

            if (node_value(bn, A) == 1)
                ++A_count;
            if (node_value(bn, B) == 1)
                ++B_count;
            if (node_value(bn, C) == 1)
                ++C_count;
            if (node_value(bn, D) == 1)
                ++D_count;
        }

        cout << "Using the approximate Gibbs Sampler algorithm:\n";
        cout << "p(A=1 | C=1) = " << (double)A_count/(double)rounds << endl;
        cout << "p(B=1 | C=1) = " << (double)B_count/(double)rounds << endl;
        cout << "p(C=1 | C=1) = " << (double)C_count/(double)rounds << endl;
        cout << "p(D=1 | C=1) = " << (double)D_count/(double)rounds << endl;
    }
    catch (std::exception& e)
    {
        cout << "exception thrown: " << endl;
        cout << e.what() << endl;
        cout << "hit enter to terminate" << endl;
        cin.get();
    }
}