diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-03-09 13:19:22 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-03-09 13:19:22 +0000 |
commit | c21c3b0befeb46a51b6bf3758ffa30813bea0ff0 (patch) | |
tree | 9754ff1ca740f6346cf8483ec915d4054bc5da2d /ml/dlib/examples/dnn_imagenet_ex.cpp | |
parent | Adding upstream version 1.43.2. (diff) | |
download | netdata-upstream/1.44.3.tar.xz netdata-upstream/1.44.3.zip |
Adding upstream version 1.44.3.upstream/1.44.3
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'ml/dlib/examples/dnn_imagenet_ex.cpp')
-rw-r--r-- | ml/dlib/examples/dnn_imagenet_ex.cpp | 171 |
1 files changed, 171 insertions, 0 deletions
diff --git a/ml/dlib/examples/dnn_imagenet_ex.cpp b/ml/dlib/examples/dnn_imagenet_ex.cpp new file mode 100644 index 000000000..d1fa82823 --- /dev/null +++ b/ml/dlib/examples/dnn_imagenet_ex.cpp @@ -0,0 +1,171 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + This example shows how to classify an image into one of the 1000 imagenet + categories using the deep learning tools from the dlib C++ Library. We will + use the pretrained ResNet34 model available on the dlib website. + + The ResNet34 architecture is from the paper Deep Residual Learning for Image + Recognition by He, Zhang, Ren, and Sun. The model file that comes with dlib + was trained using the dnn_imagenet_train_ex.cpp program on a Titan X for + about 2 weeks. This pretrained model has a top5 error of 7.572% on the 2012 + imagenet validation dataset. + + For an introduction to dlib's DNN module read the dnn_introduction_ex.cpp and + dnn_introduction2_ex.cpp example programs. + + + Finally, these tools will use CUDA and cuDNN to drastically accelerate + network training and testing. CMake should automatically find them if they + are installed and configure things appropriately. If not, the program will + still run but will be much slower to execute. +*/ + + + +#include <dlib/dnn.h> +#include <iostream> +#include <dlib/data_io.h> +#include <dlib/gui_widgets.h> +#include <dlib/image_transforms.h> + +using namespace std; +using namespace dlib; + +// ---------------------------------------------------------------------------------------- + +// This block of statements defines the resnet-34 network + +template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET> +using residual = add_prev1<block<N,BN,1,tag1<SUBNET>>>; + +template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET> +using residual_down = add_prev2<avg_pool<2,2,2,2,skip1<tag2<block<N,BN,2,tag1<SUBNET>>>>>>; + +template <int N, template <typename> class BN, int stride, typename SUBNET> +using block = BN<con<N,3,3,1,1,relu<BN<con<N,3,3,stride,stride,SUBNET>>>>>; + +template <int N, typename SUBNET> using ares = relu<residual<block,N,affine,SUBNET>>; +template <int N, typename SUBNET> using ares_down = relu<residual_down<block,N,affine,SUBNET>>; + +template <typename SUBNET> using level1 = ares<512,ares<512,ares_down<512,SUBNET>>>; +template <typename SUBNET> using level2 = ares<256,ares<256,ares<256,ares<256,ares<256,ares_down<256,SUBNET>>>>>>; +template <typename SUBNET> using level3 = ares<128,ares<128,ares<128,ares_down<128,SUBNET>>>>; +template <typename SUBNET> using level4 = ares<64,ares<64,ares<64,SUBNET>>>; + +using anet_type = loss_multiclass_log<fc<1000,avg_pool_everything< + level1< + level2< + level3< + level4< + max_pool<3,3,2,2,relu<affine<con<64,7,7,2,2, + input_rgb_image_sized<227> + >>>>>>>>>>>; + +// ---------------------------------------------------------------------------------------- + +rectangle make_random_cropping_rect_resnet( + const matrix<rgb_pixel>& img, + dlib::rand& rnd +) +{ + // figure out what rectangle we want to crop from the image + double mins = 0.466666666, maxs = 0.875; + auto scale = mins + rnd.get_random_double()*(maxs-mins); + auto size = scale*std::min(img.nr(), img.nc()); + rectangle rect(size, size); + // randomly shift the box around + point offset(rnd.get_random_32bit_number()%(img.nc()-rect.width()), + rnd.get_random_32bit_number()%(img.nr()-rect.height())); + return move_rect(rect, offset); +} + +// ---------------------------------------------------------------------------------------- + +void randomly_crop_images ( + const matrix<rgb_pixel>& img, + dlib::array<matrix<rgb_pixel>>& crops, + dlib::rand& rnd, + long num_crops +) +{ + std::vector<chip_details> dets; + for (long i = 0; i < num_crops; ++i) + { + auto rect = make_random_cropping_rect_resnet(img, rnd); + dets.push_back(chip_details(rect, chip_dims(227,227))); + } + + extract_image_chips(img, dets, crops); + + for (auto&& img : crops) + { + // Also randomly flip the image + if (rnd.get_random_double() > 0.5) + img = fliplr(img); + + // And then randomly adjust the colors. + apply_random_color_offset(img, rnd); + } +} + +// ---------------------------------------------------------------------------------------- + +int main(int argc, char** argv) try +{ + if (argc == 1) + { + cout << "Give this program image files as command line arguments.\n" << endl; + cout << "You will also need a copy of the file resnet34_1000_imagenet_classifier.dnn " << endl; + cout << "available at http://dlib.net/files/resnet34_1000_imagenet_classifier.dnn.bz2" << endl; + cout << endl; + return 1; + } + + std::vector<string> labels; + anet_type net; + deserialize("resnet34_1000_imagenet_classifier.dnn") >> net >> labels; + + // Make a network with softmax as the final layer. We don't have to do this + // if we just want to output the single best prediction, since the anet_type + // already does this. But if we instead want to get the probability of each + // class as output we need to replace the last layer of the network with a + // softmax layer, which we do as follows: + softmax<anet_type::subnet_type> snet; + snet.subnet() = net.subnet(); + + dlib::array<matrix<rgb_pixel>> images; + matrix<rgb_pixel> img, crop; + + dlib::rand rnd; + image_window win; + + // Read images from the command prompt and print the top 5 best labels for each. + for (int i = 1; i < argc; ++i) + { + load_image(img, argv[i]); + const int num_crops = 16; + // Grab 16 random crops from the image. We will run all of them through the + // network and average the results. + randomly_crop_images(img, images, rnd, num_crops); + // p(i) == the probability the image contains object of class i. + matrix<float,1,1000> p = sum_rows(mat(snet(images.begin(), images.end())))/num_crops; + + win.set_image(img); + // Print the 5 most probable labels + for (int k = 0; k < 5; ++k) + { + unsigned long predicted_label = index_of_max(p); + cout << p(predicted_label) << ": " << labels[predicted_label] << endl; + p(predicted_label) = 0; + } + + cout << "Hit enter to process the next image"; + cin.get(); + } + +} +catch(std::exception& e) +{ + cout << e.what() << endl; +} + |