From b485aab7e71c1625cfc27e0f92c9509f42378458 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 5 May 2024 13:19:16 +0200 Subject: Adding upstream version 1.45.3+dfsg. Signed-off-by: Daniel Baumann --- ml/dlib/examples/dnn_semantic_segmentation_ex.h | 200 ------------------------ 1 file changed, 200 deletions(-) delete mode 100644 ml/dlib/examples/dnn_semantic_segmentation_ex.h (limited to 'ml/dlib/examples/dnn_semantic_segmentation_ex.h') diff --git a/ml/dlib/examples/dnn_semantic_segmentation_ex.h b/ml/dlib/examples/dnn_semantic_segmentation_ex.h deleted file mode 100644 index 47fc102c9..000000000 --- a/ml/dlib/examples/dnn_semantic_segmentation_ex.h +++ /dev/null @@ -1,200 +0,0 @@ -// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt -/* - Semantic segmentation using the PASCAL VOC2012 dataset. - - In segmentation, the task is to assign each pixel of an input image - a label - for example, 'dog'. Then, the idea is that neighboring - pixels having the same label can be connected together to form a - larger region, representing a complete (or partially occluded) dog. - So technically, segmentation can be viewed as classification of - individual pixels (using the relevant context in the input images), - however the goal usually is to identify meaningful regions that - represent complete entities of interest (such as dogs). - - Instructions how to run the example: - 1. Download the PASCAL VOC2012 data, and untar it somewhere. - http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar - 2. Build the dnn_semantic_segmentation_train_ex example program. - 3. Run: - ./dnn_semantic_segmentation_train_ex /path/to/VOC2012 - 4. Wait while the network is being trained. - 5. Build the dnn_semantic_segmentation_ex example program. - 6. Run: - ./dnn_semantic_segmentation_ex /path/to/VOC2012-or-other-images - - An alternative to steps 2-4 above is to download a pre-trained network - from here: http://dlib.net/files/semantic_segmentation_voc2012net.dnn - - It would be a good idea to become familiar with dlib's DNN tooling before reading this - example. So you should read dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp - before reading this example program. -*/ - -#ifndef DLIB_DNn_SEMANTIC_SEGMENTATION_EX_H_ -#define DLIB_DNn_SEMANTIC_SEGMENTATION_EX_H_ - -#include - -// ---------------------------------------------------------------------------------------- - -inline bool operator == (const dlib::rgb_pixel& a, const dlib::rgb_pixel& b) -{ - return a.red == b.red && a.green == b.green && a.blue == b.blue; -} - -// ---------------------------------------------------------------------------------------- - -// The PASCAL VOC2012 dataset contains 20 ground-truth classes + background. Each class -// is represented using an RGB color value. We associate each class also to an index in the -// range [0, 20], used internally by the network. - -struct Voc2012class { - Voc2012class(uint16_t index, const dlib::rgb_pixel& rgb_label, const std::string& classlabel) - : index(index), rgb_label(rgb_label), classlabel(classlabel) - {} - - // The index of the class. In the PASCAL VOC 2012 dataset, indexes from 0 to 20 are valid. - const uint16_t index = 0; - - // The corresponding RGB representation of the class. - const dlib::rgb_pixel rgb_label; - - // The label of the class in plain text. - const std::string classlabel; -}; - -namespace { - constexpr int class_count = 21; // background + 20 classes - - const std::vector classes = { - Voc2012class(0, dlib::rgb_pixel(0, 0, 0), ""), // background - - // The cream-colored `void' label is used in border regions and to mask difficult objects - // (see http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/devkit_doc.html) - Voc2012class(dlib::loss_multiclass_log_per_pixel_::label_to_ignore, - dlib::rgb_pixel(224, 224, 192), "border"), - - Voc2012class(1, dlib::rgb_pixel(128, 0, 0), "aeroplane"), - Voc2012class(2, dlib::rgb_pixel( 0, 128, 0), "bicycle"), - Voc2012class(3, dlib::rgb_pixel(128, 128, 0), "bird"), - Voc2012class(4, dlib::rgb_pixel( 0, 0, 128), "boat"), - Voc2012class(5, dlib::rgb_pixel(128, 0, 128), "bottle"), - Voc2012class(6, dlib::rgb_pixel( 0, 128, 128), "bus"), - Voc2012class(7, dlib::rgb_pixel(128, 128, 128), "car"), - Voc2012class(8, dlib::rgb_pixel( 64, 0, 0), "cat"), - Voc2012class(9, dlib::rgb_pixel(192, 0, 0), "chair"), - Voc2012class(10, dlib::rgb_pixel( 64, 128, 0), "cow"), - Voc2012class(11, dlib::rgb_pixel(192, 128, 0), "diningtable"), - Voc2012class(12, dlib::rgb_pixel( 64, 0, 128), "dog"), - Voc2012class(13, dlib::rgb_pixel(192, 0, 128), "horse"), - Voc2012class(14, dlib::rgb_pixel( 64, 128, 128), "motorbike"), - Voc2012class(15, dlib::rgb_pixel(192, 128, 128), "person"), - Voc2012class(16, dlib::rgb_pixel( 0, 64, 0), "pottedplant"), - Voc2012class(17, dlib::rgb_pixel(128, 64, 0), "sheep"), - Voc2012class(18, dlib::rgb_pixel( 0, 192, 0), "sofa"), - Voc2012class(19, dlib::rgb_pixel(128, 192, 0), "train"), - Voc2012class(20, dlib::rgb_pixel( 0, 64, 128), "tvmonitor"), - }; -} - -template -const Voc2012class& find_voc2012_class(Predicate predicate) -{ - const auto i = std::find_if(classes.begin(), classes.end(), predicate); - - if (i != classes.end()) - { - return *i; - } - else - { - throw std::runtime_error("Unable to find a matching VOC2012 class"); - } -} - -// ---------------------------------------------------------------------------------------- - -// Introduce the building blocks used to define the segmentation network. -// The network first does residual downsampling (similar to the dnn_imagenet_(train_)ex -// example program), and then residual upsampling. The network could be improved e.g. -// by introducing skip connections from the input image, and/or the first layers, to the -// last layer(s). (See Long et al., Fully Convolutional Networks for Semantic Segmentation, -// https://people.eecs.berkeley.edu/~jonlong/long_shelhamer_fcn.pdf) - -template class BN, int stride, typename SUBNET> -using block = BN>>>>; - -template class BN, int stride, typename SUBNET> -using blockt = BN>>>>; - -template