summaryrefslogtreecommitdiffstats
path: root/src/ml/dlib/examples/parallel_for_ex.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/ml/dlib/examples/parallel_for_ex.cpp')
-rw-r--r--src/ml/dlib/examples/parallel_for_ex.cpp158
1 files changed, 158 insertions, 0 deletions
diff --git a/src/ml/dlib/examples/parallel_for_ex.cpp b/src/ml/dlib/examples/parallel_for_ex.cpp
new file mode 100644
index 000000000..70fccab20
--- /dev/null
+++ b/src/ml/dlib/examples/parallel_for_ex.cpp
@@ -0,0 +1,158 @@
+// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
+/*
+
+ This is an example illustrating the use of the parallel for loop tools from the dlib
+ C++ Library.
+
+ Normally, a for loop executes the body of the loop in a serial manner. This means
+ that, for example, if it takes 1 second to execute the body of the loop and the body
+ needs to execute 10 times then it will take 10 seconds to execute the entire loop.
+ However, on modern multi-core computers we have the opportunity to speed this up by
+ executing multiple steps of a for loop in parallel. This example program will walk you
+ though a few examples showing how to do just that.
+*/
+
+
+#include <dlib/threads.h>
+#include <dlib/misc_api.h> // for dlib::sleep
+#include <vector>
+#include <iostream>
+
+using namespace dlib;
+using namespace std;
+
+// ----------------------------------------------------------------------------------------
+
+void print(const std::vector<int>& vect)
+{
+ for (unsigned long i = 0; i < vect.size(); ++i)
+ {
+ cout << vect[i] << endl;
+ }
+ cout << "\n**************************************\n";
+}
+
+// ----------------------------------------------------------------------------------------
+
+void example_using_regular_non_parallel_loops();
+void example_using_lambda_functions();
+
+// ----------------------------------------------------------------------------------------
+
+int main()
+{
+ // We have 2 examples, each contained in a separate function. Both examples perform
+ // exactly the same computation, however, the second does so using parallel for loops.
+ // The first example is here to show you what we are doing in terms of classical
+ // non-parallel for loops. The other example will illustrate how to parallelize the
+ // for loops in C++11.
+
+ example_using_regular_non_parallel_loops();
+ example_using_lambda_functions();
+}
+
+// ----------------------------------------------------------------------------------------
+
+void example_using_regular_non_parallel_loops()
+{
+ cout << "\nExample using regular non-parallel for loops\n" << endl;
+
+ std::vector<int> vect;
+
+ // put 10 elements into vect which are all equal to -1
+ vect.assign(10, -1);
+
+ // Now set each element equal to its index value. We put a sleep call in here so that
+ // when we run the same thing with a parallel for loop later on you will be able to
+ // observe the speedup.
+ for (unsigned long i = 0; i < vect.size(); ++i)
+ {
+ vect[i] = i;
+ dlib::sleep(1000); // sleep for 1 second
+ }
+ print(vect);
+
+
+
+ // Assign only part of the elements in vect.
+ vect.assign(10, -1);
+ for (unsigned long i = 1; i < 5; ++i)
+ {
+ vect[i] = i;
+ dlib::sleep(1000);
+ }
+ print(vect);
+
+
+
+ // Sum all element sin vect.
+ int sum = 0;
+ vect.assign(10, 2);
+ for (unsigned long i = 0; i < vect.size(); ++i)
+ {
+ dlib::sleep(1000);
+ sum += vect[i];
+ }
+
+ cout << "sum: "<< sum << endl;
+}
+
+// ----------------------------------------------------------------------------------------
+
+void example_using_lambda_functions()
+{
+ cout << "\nExample using parallel for loops\n" << endl;
+
+ std::vector<int> vect;
+
+ vect.assign(10, -1);
+ parallel_for(0, vect.size(), [&](long i){
+ // The i variable is the loop counter as in a normal for loop. So we simply need
+ // to place the body of the for loop right here and we get the same behavior. The
+ // range for the for loop is determined by the 1nd and 2rd arguments to
+ // parallel_for(). This way of calling parallel_for() will use a number of threads
+ // that is appropriate for your hardware. See the parallel_for() documentation for
+ // other options.
+ vect[i] = i;
+ dlib::sleep(1000);
+ });
+ print(vect);
+
+
+ // Assign only part of the elements in vect.
+ vect.assign(10, -1);
+ parallel_for(1, 5, [&](long i){
+ vect[i] = i;
+ dlib::sleep(1000);
+ });
+ print(vect);
+
+
+ // Note that things become a little more complex if the loop bodies are not totally
+ // independent. In the first two cases each iteration of the loop touched different
+ // memory locations, so we didn't need to use any kind of thread synchronization.
+ // However, in the summing loop we need to add some synchronization to protect the sum
+ // variable. This is easily accomplished by creating a mutex and locking it before
+ // adding to sum. More generally, you must ensure that the bodies of your parallel for
+ // loops are thread safe using whatever means is appropriate for your code. Since a
+ // parallel for loop is implemented using threads, all the usual techniques for
+ // ensuring thread safety can be used.
+ int sum = 0;
+ dlib::mutex m;
+ vect.assign(10, 2);
+ parallel_for(0, vect.size(), [&](long i){
+ // The sleep statements still execute in parallel.
+ dlib::sleep(1000);
+
+ // Lock the m mutex. The auto_mutex will automatically unlock at the closing }.
+ // This will ensure only one thread can execute the sum += vect[i] statement at
+ // a time.
+ auto_mutex lock(m);
+ sum += vect[i];
+ });
+
+ cout << "sum: "<< sum << endl;
+}
+
+// ----------------------------------------------------------------------------------------
+