diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-03-09 13:19:48 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-03-09 13:20:02 +0000 |
commit | 58daab21cd043e1dc37024a7f99b396788372918 (patch) | |
tree | 96771e43bb69f7c1c2b0b4f7374cb74d7866d0cb /ml/dlib/examples/dnn_introduction_ex.cpp | |
parent | Releasing debian version 1.43.2-1. (diff) | |
download | netdata-58daab21cd043e1dc37024a7f99b396788372918.tar.xz netdata-58daab21cd043e1dc37024a7f99b396788372918.zip |
Merging upstream version 1.44.3.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'ml/dlib/examples/dnn_introduction_ex.cpp')
-rw-r--r-- | ml/dlib/examples/dnn_introduction_ex.cpp | 170 |
1 files changed, 170 insertions, 0 deletions
diff --git a/ml/dlib/examples/dnn_introduction_ex.cpp b/ml/dlib/examples/dnn_introduction_ex.cpp new file mode 100644 index 000000000..6ae3ddf73 --- /dev/null +++ b/ml/dlib/examples/dnn_introduction_ex.cpp @@ -0,0 +1,170 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This is an example illustrating the use of the deep learning tools from the + dlib C++ Library. In it, we will train the venerable LeNet convolutional + neural network to recognize hand written digits. The network will take as + input a small image and classify it as one of the 10 numeric digits between + 0 and 9. + + The specific network we will run is from the paper + LeCun, Yann, et al. "Gradient-based learning applied to document recognition." + Proceedings of the IEEE 86.11 (1998): 2278-2324. + except that we replace the sigmoid non-linearities with rectified linear units. + + These tools will use CUDA and cuDNN to drastically accelerate network + training and testing. CMake should automatically find them if they are + installed and configure things appropriately. If not, the program will + still run but will be much slower to execute. +*/ + + +#include <dlib/dnn.h> +#include <iostream> +#include <dlib/data_io.h> + +using namespace std; +using namespace dlib; + +int main(int argc, char** argv) try +{ + // This example is going to run on the MNIST dataset. + if (argc != 2) + { + cout << "This example needs the MNIST dataset to run!" << endl; + cout << "You can get MNIST from http://yann.lecun.com/exdb/mnist/" << endl; + cout << "Download the 4 files that comprise the dataset, decompress them, and" << endl; + cout << "put them in a folder. Then give that folder as input to this program." << endl; + return 1; + } + + + // MNIST is broken into two parts, a training set of 60000 images and a test set of + // 10000 images. Each image is labeled so that we know what hand written digit is + // depicted. These next statements load the dataset into memory. + std::vector<matrix<unsigned char>> training_images; + std::vector<unsigned long> training_labels; + std::vector<matrix<unsigned char>> testing_images; + std::vector<unsigned long> testing_labels; + load_mnist_dataset(argv[1], training_images, training_labels, testing_images, testing_labels); + + + // Now let's define the LeNet. Broadly speaking, there are 3 parts to a network + // definition. The loss layer, a bunch of computational layers, and then an input + // layer. You can see these components in the network definition below. + // + // The input layer here says the network expects to be given matrix<unsigned char> + // objects as input. In general, you can use any dlib image or matrix type here, or + // even define your own types by creating custom input layers. + // + // Then the middle layers define the computation the network will do to transform the + // input into whatever we want. Here we run the image through multiple convolutions, + // ReLU units, max pooling operations, and then finally a fully connected layer that + // converts the whole thing into just 10 numbers. + // + // Finally, the loss layer defines the relationship between the network outputs, our 10 + // numbers, and the labels in our dataset. Since we selected loss_multiclass_log it + // means we want to do multiclass classification with our network. Moreover, the + // number of network outputs (i.e. 10) is the number of possible labels. Whichever + // network output is largest is the predicted label. So for example, if the first + // network output is largest then the predicted digit is 0, if the last network output + // is largest then the predicted digit is 9. + using net_type = loss_multiclass_log< + fc<10, + relu<fc<84, + relu<fc<120, + max_pool<2,2,2,2,relu<con<16,5,5,1,1, + max_pool<2,2,2,2,relu<con<6,5,5,1,1, + input<matrix<unsigned char>> + >>>>>>>>>>>>; + // This net_type defines the entire network architecture. For example, the block + // relu<fc<84,SUBNET>> means we take the output from the subnetwork, pass it through a + // fully connected layer with 84 outputs, then apply ReLU. Similarly, a block of + // max_pool<2,2,2,2,relu<con<16,5,5,1,1,SUBNET>>> means we apply 16 convolutions with a + // 5x5 filter size and 1x1 stride to the output of a subnetwork, then apply ReLU, then + // perform max pooling with a 2x2 window and 2x2 stride. + + + + // So with that out of the way, we can make a network instance. + net_type net; + // And then train it using the MNIST data. The code below uses mini-batch stochastic + // gradient descent with an initial learning rate of 0.01 to accomplish this. + dnn_trainer<net_type> trainer(net); + trainer.set_learning_rate(0.01); + trainer.set_min_learning_rate(0.00001); + trainer.set_mini_batch_size(128); + trainer.be_verbose(); + // Since DNN training can take a long time, we can ask the trainer to save its state to + // a file named "mnist_sync" every 20 seconds. This way, if we kill this program and + // start it again it will begin where it left off rather than restarting the training + // from scratch. This is because, when the program restarts, this call to + // set_synchronization_file() will automatically reload the settings from mnist_sync if + // the file exists. + trainer.set_synchronization_file("mnist_sync", std::chrono::seconds(20)); + // Finally, this line begins training. By default, it runs SGD with our specified + // learning rate until the loss stops decreasing. Then it reduces the learning rate by + // a factor of 10 and continues running until the loss stops decreasing again. It will + // keep doing this until the learning rate has dropped below the min learning rate + // defined above or the maximum number of epochs as been executed (defaulted to 10000). + trainer.train(training_images, training_labels); + + // At this point our net object should have learned how to classify MNIST images. But + // before we try it out let's save it to disk. Note that, since the trainer has been + // running images through the network, net will have a bunch of state in it related to + // the last batch of images it processed (e.g. outputs from each layer). Since we + // don't care about saving that kind of stuff to disk we can tell the network to forget + // about that kind of transient data so that our file will be smaller. We do this by + // "cleaning" the network before saving it. + net.clean(); + serialize("mnist_network.dat") << net; + // Now if we later wanted to recall the network from disk we can simply say: + // deserialize("mnist_network.dat") >> net; + + + // Now let's run the training images through the network. This statement runs all the + // images through it and asks the loss layer to convert the network's raw output into + // labels. In our case, these labels are the numbers between 0 and 9. + std::vector<unsigned long> predicted_labels = net(training_images); + int num_right = 0; + int num_wrong = 0; + // And then let's see if it classified them correctly. + for (size_t i = 0; i < training_images.size(); ++i) + { + if (predicted_labels[i] == training_labels[i]) + ++num_right; + else + ++num_wrong; + + } + cout << "training num_right: " << num_right << endl; + cout << "training num_wrong: " << num_wrong << endl; + cout << "training accuracy: " << num_right/(double)(num_right+num_wrong) << endl; + + // Let's also see if the network can correctly classify the testing images. Since + // MNIST is an easy dataset, we should see at least 99% accuracy. + predicted_labels = net(testing_images); + num_right = 0; + num_wrong = 0; + for (size_t i = 0; i < testing_images.size(); ++i) + { + if (predicted_labels[i] == testing_labels[i]) + ++num_right; + else + ++num_wrong; + + } + cout << "testing num_right: " << num_right << endl; + cout << "testing num_wrong: " << num_wrong << endl; + cout << "testing accuracy: " << num_right/(double)(num_right+num_wrong) << endl; + + + // Finally, you can also save network parameters to XML files if you want to do + // something with the network in another tool. For example, you could use dlib's + // tools/convert_dlib_nets_to_caffe to convert the network to a caffe model. + net_to_xml(net, "lenet.xml"); +} +catch(std::exception& e) +{ + cout << e.what() << endl; +} + |