summaryrefslogtreecommitdiffstats
path: root/ml/dlib/tools/imglab
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/tools/imglab')
-rw-r--r--ml/dlib/tools/imglab/CMakeLists.txt41
-rw-r--r--ml/dlib/tools/imglab/README.txt40
-rwxr-xr-xml/dlib/tools/imglab/convert_imglab_paths_to_relative24
-rwxr-xr-xml/dlib/tools/imglab/copy_imglab_dataset22
-rw-r--r--ml/dlib/tools/imglab/src/cluster.cpp260
-rw-r--r--ml/dlib/tools/imglab/src/cluster.h11
-rw-r--r--ml/dlib/tools/imglab/src/common.cpp60
-rw-r--r--ml/dlib/tools/imglab/src/common.h45
-rw-r--r--ml/dlib/tools/imglab/src/convert_idl.cpp184
-rw-r--r--ml/dlib/tools/imglab/src/convert_idl.h14
-rw-r--r--ml/dlib/tools/imglab/src/convert_pascal_v1.cpp177
-rw-r--r--ml/dlib/tools/imglab/src/convert_pascal_v1.h13
-rw-r--r--ml/dlib/tools/imglab/src/convert_pascal_xml.cpp239
-rw-r--r--ml/dlib/tools/imglab/src/convert_pascal_xml.h12
-rw-r--r--ml/dlib/tools/imglab/src/flip_dataset.cpp249
-rw-r--r--ml/dlib/tools/imglab/src/flip_dataset.h12
-rw-r--r--ml/dlib/tools/imglab/src/main.cpp1145
-rw-r--r--ml/dlib/tools/imglab/src/metadata_editor.cpp671
-rw-r--r--ml/dlib/tools/imglab/src/metadata_editor.h116
19 files changed, 3335 insertions, 0 deletions
diff --git a/ml/dlib/tools/imglab/CMakeLists.txt b/ml/dlib/tools/imglab/CMakeLists.txt
new file mode 100644
index 000000000..46c64fb3e
--- /dev/null
+++ b/ml/dlib/tools/imglab/CMakeLists.txt
@@ -0,0 +1,41 @@
+#
+# This is a CMake makefile. You can find the cmake utility and
+# information about it at http://www.cmake.org
+#
+
+cmake_minimum_required(VERSION 2.8.12)
+
+# create a variable called target_name and set it to the string "imglab"
+set (target_name imglab)
+
+PROJECT(${target_name})
+add_subdirectory(../../dlib dlib_build)
+
+# add all the cpp files we want to compile to this list. This tells
+# cmake that they are part of our target (which is the executable named imglab)
+add_executable(${target_name}
+ src/main.cpp
+ src/metadata_editor.h
+ src/metadata_editor.cpp
+ src/convert_pascal_xml.h
+ src/convert_pascal_xml.cpp
+ src/convert_pascal_v1.h
+ src/convert_pascal_v1.cpp
+ src/convert_idl.h
+ src/convert_idl.cpp
+ src/common.h
+ src/common.cpp
+ src/cluster.cpp
+ src/flip_dataset.cpp
+)
+
+
+# Tell cmake to link our target executable to dlib.
+target_link_libraries(${target_name} dlib::dlib )
+
+
+install(TARGETS ${target_name}
+ RUNTIME DESTINATION bin
+ )
+install(PROGRAMS convert_imglab_paths_to_relative copy_imglab_dataset DESTINATION bin )
+
diff --git a/ml/dlib/tools/imglab/README.txt b/ml/dlib/tools/imglab/README.txt
new file mode 100644
index 000000000..3f0ca92a1
--- /dev/null
+++ b/ml/dlib/tools/imglab/README.txt
@@ -0,0 +1,40 @@
+imglab is a simple graphical tool for annotating images with object bounding
+boxes and optionally their part locations. Generally, you use it when you want
+to train an object detector (e.g. a face detector) since it allows you to
+easily create the needed training dataset.
+
+You can compile imglab with the following commands:
+ cd dlib/tools/imglab
+ mkdir build
+ cd build
+ cmake ..
+ cmake --build . --config Release
+Note that you may need to install CMake (www.cmake.org) for this to work. On a
+unix system you can also install imglab into /usr/local/bin by running
+ sudo make install
+This will make running it more convenient.
+
+Next, to use it, lets assume you have a folder of images called /tmp/images.
+These images should contain examples of the objects you want to learn to
+detect. You will use the imglab tool to label these objects. Do this by
+typing the following command:
+ ./imglab -c mydataset.xml /tmp/images
+This will create a file called mydataset.xml which simply lists the images in
+/tmp/images. To add bounding boxes to the objects you run:
+ ./imglab mydataset.xml
+and a window will appear showing all the images. You can use the up and down
+arrow keys to cycle though the images and the mouse to label objects. In
+particular, holding the shift key, left clicking, and dragging the mouse will
+allow you to draw boxes around the objects you wish to detect.
+
+Once you finish labeling objects go to the file menu, click save, and then
+close the program. This will save the object boxes back to mydataset.xml. You
+can verify this by opening the tool again with:
+ ./imglab mydataset.xml
+and observing that the boxes are present.
+
+
+imglab can do a few additional things. To see these run:
+ imglab -h
+and also read the instructions in the About->Help menu.
+
diff --git a/ml/dlib/tools/imglab/convert_imglab_paths_to_relative b/ml/dlib/tools/imglab/convert_imglab_paths_to_relative
new file mode 100755
index 000000000..09c5ef7a5
--- /dev/null
+++ b/ml/dlib/tools/imglab/convert_imglab_paths_to_relative
@@ -0,0 +1,24 @@
+#!/usr/bin/perl
+
+use File::Spec;
+
+die "This script converts all the file names in an imglab XML file to have paths relative to the current folder. Call it like this: ./convert_imglab_paths_to_relative some_file.xml" if @ARGV != 1;
+
+$file = @ARGV[0];
+open(INFO, $file) or die('Could not open file.');
+
+foreach $line (<INFO>)
+{
+ if (index($line, 'file=\'') != -1)
+ {
+ $line =~ /file='(.*)'/;
+ $relpath = File::Spec->abs2rel($1);
+ $line =~ s/$1/$relpath/;
+ print $line
+ }
+ else
+ {
+ print $line
+ }
+}
+
diff --git a/ml/dlib/tools/imglab/copy_imglab_dataset b/ml/dlib/tools/imglab/copy_imglab_dataset
new file mode 100755
index 000000000..8b44ed166
--- /dev/null
+++ b/ml/dlib/tools/imglab/copy_imglab_dataset
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+if [ "$#" -ne 2 ]; then
+ echo "This script copies an imglab XML file and its associated images to a new folder."
+ echo "Notably, it will avoid copying unnecessary images."
+ echo "Call this script like this:"
+ echo " ./copy_dataset some_file.xml dest_dir"
+ exit 1
+fi
+
+XML_FILE=$1
+DEST=$2
+
+
+
+mkdir -p $DEST
+
+# Get the list of files we need to copy, then build the cp statements with 1000 files at most in each statement, then tell bash to run them all.
+imglab --files $XML_FILE | xargs perl -e 'use File::Spec; foreach (@ARGV) {print File::Spec->abs2rel($_) . "\n"}' | sort | uniq | xargs -L1000 echo | xargs -I{} echo cp -a --parents {} $DEST | bash
+
+convert_imglab_paths_to_relative $XML_FILE > $DEST/$(basename $XML_FILE)
+
diff --git a/ml/dlib/tools/imglab/src/cluster.cpp b/ml/dlib/tools/imglab/src/cluster.cpp
new file mode 100644
index 000000000..23b289a7f
--- /dev/null
+++ b/ml/dlib/tools/imglab/src/cluster.cpp
@@ -0,0 +1,260 @@
+// Copyright (C) 2015 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+
+#include "cluster.h"
+#include <dlib/console_progress_indicator.h>
+#include <dlib/image_io.h>
+#include <dlib/data_io.h>
+#include <dlib/image_transforms.h>
+#include <dlib/misc_api.h>
+#include <dlib/dir_nav.h>
+#include <dlib/clustering.h>
+#include <dlib/svm.h>
+
+// ----------------------------------------------------------------------------------------
+
+using namespace std;
+using namespace dlib;
+
+// ----------------------------------------------------------------------------
+
+struct assignment
+{
+ unsigned long c;
+ double dist;
+ unsigned long idx;
+
+ bool operator<(const assignment& item) const
+ { return dist < item.dist; }
+};
+
+std::vector<assignment> angular_cluster (
+ std::vector<matrix<double,0,1> > feats,
+ const unsigned long num_clusters
+)
+{
+ DLIB_CASSERT(feats.size() != 0, "The dataset can't be empty");
+ for (unsigned long i = 0; i < feats.size(); ++i)
+ {
+ DLIB_CASSERT(feats[i].size() == feats[0].size(), "All feature vectors must have the same length.");
+ }
+
+ // find the centroid of feats
+ matrix<double,0,1> m;
+ for (unsigned long i = 0; i < feats.size(); ++i)
+ m += feats[i];
+ m /= feats.size();
+
+ // Now center feats and then project onto the unit sphere. The reason for projecting
+ // onto the unit sphere is so pick_initial_centers() works in a sensible way.
+ for (unsigned long i = 0; i < feats.size(); ++i)
+ {
+ feats[i] -= m;
+ double len = length(feats[i]);
+ if (len != 0)
+ feats[i] /= len;
+ }
+
+ // now do angular clustering of the points
+ std::vector<matrix<double,0,1> > centers;
+ pick_initial_centers(num_clusters, centers, feats, linear_kernel<matrix<double,0,1> >(), 0.05);
+ find_clusters_using_angular_kmeans(feats, centers);
+
+ // and then report the resulting assignments
+ std::vector<assignment> assignments;
+ for (unsigned long i = 0; i < feats.size(); ++i)
+ {
+ assignment temp;
+ temp.c = nearest_center(centers, feats[i]);
+ temp.dist = length(feats[i] - centers[temp.c]);
+ temp.idx = i;
+ assignments.push_back(temp);
+ }
+ return assignments;
+}
+
+// ----------------------------------------------------------------------------------------
+
+bool compare_first (
+ const std::pair<double,image_dataset_metadata::image>& a,
+ const std::pair<double,image_dataset_metadata::image>& b
+)
+{
+ return a.first < b.first;
+}
+
+// ----------------------------------------------------------------------------------------
+
+double mean_aspect_ratio (
+ const image_dataset_metadata::dataset& data
+)
+{
+ double sum = 0;
+ double cnt = 0;
+ for (unsigned long i = 0; i < data.images.size(); ++i)
+ {
+ for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
+ {
+ rectangle rect = data.images[i].boxes[j].rect;
+ if (rect.area() == 0 || data.images[i].boxes[j].ignore)
+ continue;
+ sum += rect.width()/(double)rect.height();
+ ++cnt;
+ }
+ }
+
+ if (cnt != 0)
+ return sum/cnt;
+ else
+ return 0;
+}
+
+// ----------------------------------------------------------------------------------------
+
+bool has_non_ignored_boxes (const image_dataset_metadata::image& img)
+{
+ for (auto&& b : img.boxes)
+ {
+ if (!b.ignore)
+ return true;
+ }
+ return false;
+}
+
+// ----------------------------------------------------------------------------------------
+
+int cluster_dataset(
+ const dlib::command_line_parser& parser
+)
+{
+ // make sure the user entered an argument to this program
+ if (parser.number_of_arguments() != 1)
+ {
+ cerr << "The --cluster option requires you to give one XML file on the command line." << endl;
+ return EXIT_FAILURE;
+ }
+
+ const unsigned long num_clusters = get_option(parser, "cluster", 2);
+ const unsigned long chip_size = get_option(parser, "size", 8000);
+
+ image_dataset_metadata::dataset data;
+
+ image_dataset_metadata::load_image_dataset_metadata(data, parser[0]);
+ set_current_dir(get_parent_directory(file(parser[0])));
+
+ const double aspect_ratio = mean_aspect_ratio(data);
+
+ dlib::array<array2d<rgb_pixel> > images;
+ std::vector<matrix<double,0,1> > feats;
+ console_progress_indicator pbar(data.images.size());
+ // extract all the object chips and HOG features.
+ cout << "Loading image data..." << endl;
+ for (unsigned long i = 0; i < data.images.size(); ++i)
+ {
+ pbar.print_status(i);
+ if (!has_non_ignored_boxes(data.images[i]))
+ continue;
+
+ array2d<rgb_pixel> img, chip;
+ load_image(img, data.images[i].filename);
+
+ for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
+ {
+ if (data.images[i].boxes[j].ignore || data.images[i].boxes[j].rect.area() < 10)
+ continue;
+ drectangle rect = data.images[i].boxes[j].rect;
+ rect = set_aspect_ratio(rect, aspect_ratio);
+ extract_image_chip(img, chip_details(rect, chip_size), chip);
+ feats.push_back(extract_fhog_features(chip));
+ images.push_back(chip);
+ }
+ }
+
+ if (feats.size() == 0)
+ {
+ cerr << "No non-ignored object boxes found in the XML dataset. You can't cluster an empty dataset." << endl;
+ return EXIT_FAILURE;
+ }
+
+ cout << "\nClustering objects..." << endl;
+ std::vector<assignment> assignments = angular_cluster(feats, num_clusters);
+
+
+ // Now output each cluster to disk as an XML file.
+ for (unsigned long c = 0; c < num_clusters; ++c)
+ {
+ // We are going to accumulate all the image metadata for cluster c. We put it
+ // into idata so we can sort the images such that images with central chips
+ // come before less central chips. The idea being to get the good chips to
+ // show up first in the listing, making it easy to manually remove bad ones if
+ // that is desired.
+ std::vector<std::pair<double,image_dataset_metadata::image> > idata(data.images.size());
+ unsigned long idx = 0;
+ for (unsigned long i = 0; i < data.images.size(); ++i)
+ {
+ idata[i].first = std::numeric_limits<double>::infinity();
+ idata[i].second.filename = data.images[i].filename;
+ if (!has_non_ignored_boxes(data.images[i]))
+ continue;
+
+ for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
+ {
+ idata[i].second.boxes.push_back(data.images[i].boxes[j]);
+
+ if (data.images[i].boxes[j].ignore || data.images[i].boxes[j].rect.area() < 10)
+ continue;
+
+ // If this box goes into cluster c then update the score for the whole
+ // image based on this boxes' score. Otherwise, mark the box as
+ // ignored.
+ if (assignments[idx].c == c)
+ idata[i].first = std::min(idata[i].first, assignments[idx].dist);
+ else
+ idata[i].second.boxes.back().ignore = true;
+
+ ++idx;
+ }
+ }
+
+ // now save idata to an xml file.
+ std::sort(idata.begin(), idata.end(), compare_first);
+ image_dataset_metadata::dataset cdata;
+ cdata.comment = data.comment + "\n\n This file contains objects which were clustered into group " +
+ cast_to_string(c+1) + " of " + cast_to_string(num_clusters) + " groups with a chip size of " +
+ cast_to_string(chip_size) + " by imglab.";
+ cdata.name = data.name;
+ for (unsigned long i = 0; i < idata.size(); ++i)
+ {
+ // if this image has non-ignored boxes in it then include it in the output.
+ if (idata[i].first != std::numeric_limits<double>::infinity())
+ cdata.images.push_back(idata[i].second);
+ }
+
+ string outfile = "cluster_"+pad_int_with_zeros(c+1, 3) + ".xml";
+ cout << "Saving " << outfile << endl;
+ save_image_dataset_metadata(cdata, outfile);
+ }
+
+ // Now output each cluster to disk as a big tiled jpeg file. Sort everything so, just
+ // like in the xml file above, the best objects come first in the tiling.
+ std::sort(assignments.begin(), assignments.end());
+ for (unsigned long c = 0; c < num_clusters; ++c)
+ {
+ dlib::array<array2d<rgb_pixel> > temp;
+ for (unsigned long i = 0; i < assignments.size(); ++i)
+ {
+ if (assignments[i].c == c)
+ temp.push_back(images[assignments[i].idx]);
+ }
+
+ string outfile = "cluster_"+pad_int_with_zeros(c+1, 3) + ".jpg";
+ cout << "Saving " << outfile << endl;
+ save_jpeg(tile_images(temp), outfile);
+ }
+
+
+ return EXIT_SUCCESS;
+}
+
+// ----------------------------------------------------------------------------------------
+
diff --git a/ml/dlib/tools/imglab/src/cluster.h b/ml/dlib/tools/imglab/src/cluster.h
new file mode 100644
index 000000000..6cb41a373
--- /dev/null
+++ b/ml/dlib/tools/imglab/src/cluster.h
@@ -0,0 +1,11 @@
+// Copyright (C) 2015 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_IMGLAB_ClUSTER_H_
+#define DLIB_IMGLAB_ClUSTER_H_
+
+#include <dlib/cmd_line_parser.h>
+
+int cluster_dataset(const dlib::command_line_parser& parser);
+
+#endif //DLIB_IMGLAB_ClUSTER_H_
+
diff --git a/ml/dlib/tools/imglab/src/common.cpp b/ml/dlib/tools/imglab/src/common.cpp
new file mode 100644
index 000000000..d9cc1dca4
--- /dev/null
+++ b/ml/dlib/tools/imglab/src/common.cpp
@@ -0,0 +1,60 @@
+// Copyright (C) 2011 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+
+#include "common.h"
+#include <fstream>
+#include <dlib/error.h>
+
+// ----------------------------------------------------------------------------------------
+
+std::string strip_path (
+ const std::string& str,
+ const std::string& prefix
+)
+{
+ unsigned long i;
+ for (i = 0; i < str.size() && i < prefix.size(); ++i)
+ {
+ if (str[i] != prefix[i])
+ return str;
+ }
+
+ if (i < str.size() && (str[i] == '/' || str[i] == '\\'))
+ ++i;
+
+ return str.substr(i);
+}
+
+// ----------------------------------------------------------------------------------------
+
+void make_empty_file (
+ const std::string& filename
+)
+{
+ std::ofstream fout(filename.c_str());
+ if (!fout)
+ throw dlib::error("ERROR: Unable to open " + filename + " for writing.");
+}
+
+// ----------------------------------------------------------------------------------------
+
+std::string to_png_name (const std::string& filename)
+{
+ std::string::size_type pos = filename.find_last_of(".");
+ if (pos == std::string::npos)
+ throw dlib::error("invalid filename: " + filename);
+ return filename.substr(0,pos) + ".png";
+}
+
+// ----------------------------------------------------------------------------------------
+
+std::string to_jpg_name (const std::string& filename)
+{
+ std::string::size_type pos = filename.find_last_of(".");
+ if (pos == std::string::npos)
+ throw dlib::error("invalid filename: " + filename);
+ return filename.substr(0,pos) + ".jpg";
+}
+
+// ----------------------------------------------------------------------------------------
+
diff --git a/ml/dlib/tools/imglab/src/common.h b/ml/dlib/tools/imglab/src/common.h
new file mode 100644
index 000000000..42e905bc3
--- /dev/null
+++ b/ml/dlib/tools/imglab/src/common.h
@@ -0,0 +1,45 @@
+// Copyright (C) 2011 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_IMGLAB_COmMON_H__
+#define DLIB_IMGLAB_COmMON_H__
+
+#include <string>
+
+// ----------------------------------------------------------------------------------------
+
+std::string strip_path (
+ const std::string& str,
+ const std::string& prefix
+);
+/*!
+ ensures
+ - if (prefix is a prefix of str) then
+ - returns the part of str after the prefix
+ (additionally, str will not begin with a / or \ character)
+ - else
+ - return str
+!*/
+
+// ----------------------------------------------------------------------------------------
+
+void make_empty_file (
+ const std::string& filename
+);
+/*!
+ ensures
+ - creates an empty file of the given name
+!*/
+
+// ----------------------------------------------------------------------------------------
+
+std::string to_png_name (const std::string& filename);
+std::string to_jpg_name (const std::string& filename);
+
+// ----------------------------------------------------------------------------------------
+
+const int JPEG_QUALITY = 90;
+
+// ----------------------------------------------------------------------------------------
+
+#endif // DLIB_IMGLAB_COmMON_H__
+
diff --git a/ml/dlib/tools/imglab/src/convert_idl.cpp b/ml/dlib/tools/imglab/src/convert_idl.cpp
new file mode 100644
index 000000000..7ff601d0c
--- /dev/null
+++ b/ml/dlib/tools/imglab/src/convert_idl.cpp
@@ -0,0 +1,184 @@
+
+#include "convert_idl.h"
+#include "dlib/data_io.h"
+#include <iostream>
+#include <string>
+#include <dlib/dir_nav.h>
+#include <dlib/time_this.h>
+#include <dlib/cmd_line_parser.h>
+
+using namespace std;
+using namespace dlib;
+
+namespace
+{
+ using namespace dlib::image_dataset_metadata;
+
+// ----------------------------------------------------------------------------------------
+
+ inline bool next_is_number(std::istream& in)
+ {
+ return ('0' <= in.peek() && in.peek() <= '9') || in.peek() == '-' || in.peek() == '+';
+ }
+
+ int read_int(std::istream& in)
+ {
+ bool is_neg = false;
+ if (in.peek() == '-')
+ {
+ is_neg = true;
+ in.get();
+ }
+ if (in.peek() == '+')
+ in.get();
+
+ int val = 0;
+ while ('0' <= in.peek() && in.peek() <= '9')
+ {
+ val = 10*val + in.get()-'0';
+ }
+
+ if (is_neg)
+ return -val;
+ else
+ return val;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ void parse_annotation_file(
+ const std::string& file,
+ dlib::image_dataset_metadata::dataset& data
+ )
+ {
+ ifstream fin(file.c_str());
+ if (!fin)
+ throw dlib::error("Unable to open file " + file);
+
+
+ bool in_quote = false;
+ int point_count = 0;
+ bool in_point_list = false;
+ bool saw_any_points = false;
+
+ image img;
+ string label;
+ point p1,p2;
+ while (fin.peek() != EOF)
+ {
+ if (in_point_list && next_is_number(fin))
+ {
+ const int val = read_int(fin);
+ switch (point_count)
+ {
+ case 0: p1.x() = val; break;
+ case 1: p1.y() = val; break;
+ case 2: p2.x() = val; break;
+ case 3: p2.y() = val; break;
+ default:
+ throw dlib::error("parse error in file " + file);
+ }
+
+ ++point_count;
+ }
+
+ char ch = fin.get();
+
+ if (ch == ':')
+ continue;
+
+ if (ch == '"')
+ {
+ in_quote = !in_quote;
+ continue;
+ }
+
+ if (in_quote)
+ {
+ img.filename += ch;
+ continue;
+ }
+
+
+ if (ch == '(')
+ {
+ in_point_list = true;
+ point_count = 0;
+ label.clear();
+ saw_any_points = true;
+ }
+ if (ch == ')')
+ {
+ in_point_list = false;
+
+ label.clear();
+ while (fin.peek() != EOF &&
+ fin.peek() != ';' &&
+ fin.peek() != ',')
+ {
+ char ch = fin.get();
+ if (ch == ':')
+ continue;
+
+ label += ch;
+ }
+ }
+
+ if (ch == ',' && !in_point_list)
+ {
+
+ box b;
+ b.rect = rectangle(p1,p2);
+ b.label = label;
+ img.boxes.push_back(b);
+ }
+
+
+ if (ch == ';')
+ {
+
+ if (saw_any_points)
+ {
+ box b;
+ b.rect = rectangle(p1,p2);
+ b.label = label;
+ img.boxes.push_back(b);
+ saw_any_points = false;
+ }
+ data.images.push_back(img);
+
+
+ img.filename.clear();
+ img.boxes.clear();
+ }
+
+
+ }
+
+
+
+ }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+void convert_idl(
+ const command_line_parser& parser
+)
+{
+ cout << "Convert from IDL annotation format..." << endl;
+
+ dlib::image_dataset_metadata::dataset dataset;
+
+ for (unsigned long i = 0; i < parser.number_of_arguments(); ++i)
+ {
+ parse_annotation_file(parser[i], dataset);
+ }
+
+ const std::string filename = parser.option("c").argument();
+ save_image_dataset_metadata(dataset, filename);
+}
+
+
+
diff --git a/ml/dlib/tools/imglab/src/convert_idl.h b/ml/dlib/tools/imglab/src/convert_idl.h
new file mode 100644
index 000000000..d8c33d961
--- /dev/null
+++ b/ml/dlib/tools/imglab/src/convert_idl.h
@@ -0,0 +1,14 @@
+// Copyright (C) 2011 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_IMGLAB_CONVErT_IDL_H__
+#define DLIB_IMGLAB_CONVErT_IDL_H__
+
+#include "common.h"
+#include <dlib/cmd_line_parser.h>
+
+void convert_idl(const dlib::command_line_parser& parser);
+
+#endif // DLIB_IMGLAB_CONVErT_IDL_H__
+
+
+
diff --git a/ml/dlib/tools/imglab/src/convert_pascal_v1.cpp b/ml/dlib/tools/imglab/src/convert_pascal_v1.cpp
new file mode 100644
index 000000000..8eaf5e2bb
--- /dev/null
+++ b/ml/dlib/tools/imglab/src/convert_pascal_v1.cpp
@@ -0,0 +1,177 @@
+
+#include "convert_pascal_v1.h"
+#include "dlib/data_io.h"
+#include <iostream>
+#include <string>
+#include <dlib/dir_nav.h>
+#include <dlib/time_this.h>
+
+using namespace std;
+using namespace dlib;
+
+namespace
+{
+ using namespace dlib::image_dataset_metadata;
+
+// ----------------------------------------------------------------------------------------
+
+ std::string pick_out_quoted_string (
+ const std::string& str
+ )
+ {
+ std::string temp;
+ bool in_quotes = false;
+ for (unsigned long i = 0; i < str.size(); ++i)
+ {
+ if (str[i] == '"')
+ {
+ in_quotes = !in_quotes;
+ }
+ else if (in_quotes)
+ {
+ temp += str[i];
+ }
+ }
+
+ return temp;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ void parse_annotation_file(
+ const std::string& file,
+ dlib::image_dataset_metadata::image& img,
+ std::string& dataset_name
+ )
+ {
+ ifstream fin(file.c_str());
+ if (!fin)
+ throw dlib::error("Unable to open file " + file);
+
+ img = dlib::image_dataset_metadata::image();
+
+ string str, line;
+ std::vector<string> words;
+ while (fin.peek() != EOF)
+ {
+ getline(fin, line);
+ words = split(line, " \r\n\t:(,-)\"");
+ if (words.size() > 2)
+ {
+ if (words[0] == "#")
+ continue;
+
+ if (words[0] == "Image" && words[1] == "filename")
+ {
+ img.filename = pick_out_quoted_string(line);
+ }
+ else if (words[0] == "Database")
+ {
+ dataset_name = pick_out_quoted_string(line);
+ }
+ else if (words[0] == "Objects" && words[1] == "with" && words.size() >= 5)
+ {
+ const int num = sa = words[4];
+ img.boxes.resize(num);
+ }
+ else if (words.size() > 4 && (words[2] == "for" || words[2] == "on") && words[3] == "object")
+ {
+ long idx = sa = words[4];
+ --idx;
+ if (idx >= (long)img.boxes.size())
+ throw dlib::error("Invalid object id number of " + words[4]);
+
+ if (words[0] == "Center" && words[1] == "point" && words.size() > 9)
+ {
+ const long x = sa = words[8];
+ const long y = sa = words[9];
+ img.boxes[idx].parts["head"] = point(x,y);
+ }
+ else if (words[0] == "Bounding" && words[1] == "box" && words.size() > 13)
+ {
+ rectangle rect;
+ img.boxes[idx].rect.left() = sa = words[10];
+ img.boxes[idx].rect.top() = sa = words[11];
+ img.boxes[idx].rect.right() = sa = words[12];
+ img.boxes[idx].rect.bottom() = sa = words[13];
+ }
+ else if (words[0] == "Original" && words[1] == "label" && words.size() > 6)
+ {
+ img.boxes[idx].label = words[6];
+ }
+ }
+ }
+
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ std::string figure_out_full_path_to_image (
+ const std::string& annotation_file,
+ const std::string& image_name
+ )
+ {
+ directory parent = get_parent_directory(file(annotation_file));
+
+
+ string temp;
+ while (true)
+ {
+ if (parent.is_root())
+ temp = parent.full_name() + image_name;
+ else
+ temp = parent.full_name() + directory::get_separator() + image_name;
+
+ if (file_exists(temp))
+ return temp;
+
+ if (parent.is_root())
+ throw dlib::error("Can't figure out where the file " + image_name + " is located.");
+ parent = get_parent_directory(parent);
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+void convert_pascal_v1(
+ const command_line_parser& parser
+)
+{
+ cout << "Convert from PASCAL v1.00 annotation format..." << endl;
+
+ dlib::image_dataset_metadata::dataset dataset;
+
+ std::string name;
+ dlib::image_dataset_metadata::image img;
+
+ const std::string filename = parser.option("c").argument();
+ // make sure the file exists so we can use the get_parent_directory() command to
+ // figure out it's parent directory.
+ make_empty_file(filename);
+ const std::string parent_dir = get_parent_directory(file(filename)).full_name();
+
+ for (unsigned long i = 0; i < parser.number_of_arguments(); ++i)
+ {
+ try
+ {
+ parse_annotation_file(parser[i], img, name);
+
+ dataset.name = name;
+ img.filename = strip_path(figure_out_full_path_to_image(parser[i], img.filename), parent_dir);
+ dataset.images.push_back(img);
+
+ }
+ catch (exception& )
+ {
+ cout << "Error while processing file " << parser[i] << endl << endl;
+ throw;
+ }
+ }
+
+ save_image_dataset_metadata(dataset, filename);
+}
+
+
diff --git a/ml/dlib/tools/imglab/src/convert_pascal_v1.h b/ml/dlib/tools/imglab/src/convert_pascal_v1.h
new file mode 100644
index 000000000..3553d03a7
--- /dev/null
+++ b/ml/dlib/tools/imglab/src/convert_pascal_v1.h
@@ -0,0 +1,13 @@
+// Copyright (C) 2011 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_IMGLAB_CONVERT_PASCAl_V1_H__
+#define DLIB_IMGLAB_CONVERT_PASCAl_V1_H__
+
+#include "common.h"
+#include <dlib/cmd_line_parser.h>
+
+void convert_pascal_v1(const dlib::command_line_parser& parser);
+
+#endif // DLIB_IMGLAB_CONVERT_PASCAl_V1_H__
+
+
diff --git a/ml/dlib/tools/imglab/src/convert_pascal_xml.cpp b/ml/dlib/tools/imglab/src/convert_pascal_xml.cpp
new file mode 100644
index 000000000..c699d7777
--- /dev/null
+++ b/ml/dlib/tools/imglab/src/convert_pascal_xml.cpp
@@ -0,0 +1,239 @@
+
+#include "convert_pascal_xml.h"
+#include "dlib/data_io.h"
+#include <iostream>
+#include <dlib/xml_parser.h>
+#include <string>
+#include <dlib/dir_nav.h>
+#include <dlib/cmd_line_parser.h>
+
+using namespace std;
+using namespace dlib;
+
+namespace
+{
+ using namespace dlib::image_dataset_metadata;
+
+// ----------------------------------------------------------------------------------------
+
+ class doc_handler : public document_handler
+ {
+ image& temp_image;
+ std::string& dataset_name;
+
+ std::vector<std::string> ts;
+ box temp_box;
+
+ public:
+
+ doc_handler(
+ image& temp_image_,
+ std::string& dataset_name_
+ ):
+ temp_image(temp_image_),
+ dataset_name(dataset_name_)
+ {}
+
+
+ virtual void start_document (
+ )
+ {
+ ts.clear();
+ temp_image = image();
+ temp_box = box();
+ dataset_name.clear();
+ }
+
+ virtual void end_document (
+ )
+ {
+ }
+
+ virtual void start_element (
+ const unsigned long ,
+ const std::string& name,
+ const dlib::attribute_list&
+ )
+ {
+ if (ts.size() == 0 && name != "annotation")
+ {
+ std::ostringstream sout;
+ sout << "Invalid XML document. Root tag must be <annotation>. Found <" << name << "> instead.";
+ throw dlib::error(sout.str());
+ }
+
+
+ ts.push_back(name);
+ }
+
+ virtual void end_element (
+ const unsigned long ,
+ const std::string& name
+ )
+ {
+ ts.pop_back();
+ if (ts.size() == 0)
+ return;
+
+ if (name == "object" && ts.back() == "annotation")
+ {
+ temp_image.boxes.push_back(temp_box);
+ temp_box = box();
+ }
+ }
+
+ virtual void characters (
+ const std::string& data
+ )
+ {
+ if (ts.size() == 2 && ts[1] == "filename")
+ {
+ temp_image.filename = trim(data);
+ }
+ else if (ts.size() == 3 && ts[2] == "database" && ts[1] == "source")
+ {
+ dataset_name = trim(data);
+ }
+ else if (ts.size() >= 3)
+ {
+ if (ts[ts.size()-2] == "bndbox" && ts[ts.size()-3] == "object")
+ {
+ if (ts.back() == "xmin") temp_box.rect.left() = string_cast<double>(data);
+ else if (ts.back() == "ymin") temp_box.rect.top() = string_cast<double>(data);
+ else if (ts.back() == "xmax") temp_box.rect.right() = string_cast<double>(data);
+ else if (ts.back() == "ymax") temp_box.rect.bottom() = string_cast<double>(data);
+ }
+ else if (ts.back() == "name" && ts[ts.size()-2] == "object")
+ {
+ temp_box.label = trim(data);
+ }
+ else if (ts.back() == "difficult" && ts[ts.size()-2] == "object")
+ {
+ if (trim(data) == "0" || trim(data) == "false")
+ {
+ temp_box.difficult = false;
+ }
+ else
+ {
+ temp_box.difficult = true;
+ }
+ }
+ else if (ts.back() == "truncated" && ts[ts.size()-2] == "object")
+ {
+ if (trim(data) == "0" || trim(data) == "false")
+ {
+ temp_box.truncated = false;
+ }
+ else
+ {
+ temp_box.truncated = true;
+ }
+ }
+ else if (ts.back() == "occluded" && ts[ts.size()-2] == "object")
+ {
+ if (trim(data) == "0" || trim(data) == "false")
+ {
+ temp_box.occluded = false;
+ }
+ else
+ {
+ temp_box.occluded = true;
+ }
+ }
+
+ }
+ }
+
+ virtual void processing_instruction (
+ const unsigned long ,
+ const std::string& ,
+ const std::string&
+ )
+ {
+ }
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ class xml_error_handler : public error_handler
+ {
+ public:
+ virtual void error (
+ const unsigned long
+ ) { }
+
+ virtual void fatal_error (
+ const unsigned long line_number
+ )
+ {
+ std::ostringstream sout;
+ sout << "There is a fatal error on line " << line_number << " so parsing will now halt.";
+ throw dlib::error(sout.str());
+ }
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ void parse_annotation_file(
+ const std::string& file,
+ dlib::image_dataset_metadata::image& img,
+ std::string& dataset_name
+ )
+ {
+ doc_handler dh(img, dataset_name);
+ xml_error_handler eh;
+
+ xml_parser::kernel_1a parser;
+ parser.add_document_handler(dh);
+ parser.add_error_handler(eh);
+
+ ifstream fin(file.c_str());
+ if (!fin)
+ throw dlib::error("Unable to open file " + file);
+ parser.parse(fin);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+void convert_pascal_xml(
+ const command_line_parser& parser
+)
+{
+ cout << "Convert from PASCAL XML annotation format..." << endl;
+
+ dlib::image_dataset_metadata::dataset dataset;
+
+ std::string name;
+ dlib::image_dataset_metadata::image img;
+
+ const std::string filename = parser.option("c").argument();
+ // make sure the file exists so we can use the get_parent_directory() command to
+ // figure out it's parent directory.
+ make_empty_file(filename);
+ const std::string parent_dir = get_parent_directory(file(filename)).full_name();
+
+ for (unsigned long i = 0; i < parser.number_of_arguments(); ++i)
+ {
+ try
+ {
+ parse_annotation_file(parser[i], img, name);
+ const string root = get_parent_directory(get_parent_directory(file(parser[i]))).full_name();
+ const string img_path = root + directory::get_separator() + "JPEGImages" + directory::get_separator();
+
+ dataset.name = name;
+ img.filename = strip_path(img_path + img.filename, parent_dir);
+ dataset.images.push_back(img);
+
+ }
+ catch (exception& )
+ {
+ cout << "Error while processing file " << parser[i] << endl << endl;
+ throw;
+ }
+ }
+
+ save_image_dataset_metadata(dataset, filename);
+}
+
diff --git a/ml/dlib/tools/imglab/src/convert_pascal_xml.h b/ml/dlib/tools/imglab/src/convert_pascal_xml.h
new file mode 100644
index 000000000..01ee1e82f
--- /dev/null
+++ b/ml/dlib/tools/imglab/src/convert_pascal_xml.h
@@ -0,0 +1,12 @@
+// Copyright (C) 2011 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_IMGLAB_CONVERT_PASCAl_XML_H__
+#define DLIB_IMGLAB_CONVERT_PASCAl_XML_H__
+
+#include "common.h"
+#include <dlib/cmd_line_parser.h>
+
+void convert_pascal_xml(const dlib::command_line_parser& parser);
+
+#endif // DLIB_IMGLAB_CONVERT_PASCAl_XML_H__
+
diff --git a/ml/dlib/tools/imglab/src/flip_dataset.cpp b/ml/dlib/tools/imglab/src/flip_dataset.cpp
new file mode 100644
index 000000000..e072dc790
--- /dev/null
+++ b/ml/dlib/tools/imglab/src/flip_dataset.cpp
@@ -0,0 +1,249 @@
+// Copyright (C) 2011 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+
+#include "flip_dataset.h"
+#include <dlib/data_io.h>
+#include <dlib/dir_nav.h>
+#include <string>
+#include "common.h"
+#include <dlib/image_transforms.h>
+#include <dlib/optimization.h>
+#include <dlib/image_processing.h>
+
+using namespace dlib;
+using namespace std;
+
+// ----------------------------------------------------------------------------------------
+
+std::vector<long> align_points(
+ const std::vector<dpoint>& from,
+ const std::vector<dpoint>& to,
+ double min_angle = -90*pi/180.0,
+ double max_angle = 90*pi/180.0,
+ long num_angles = 181
+)
+/*!
+ ensures
+ - Figures out how to align the points in from with the points in to. Returns an
+ assignment array A that indicates that from[i] matches with to[A[i]].
+
+ We use the Hungarian algorithm with a search over reasonable angles. This method
+ works because we just need to account for a translation and a mild rotation and
+ nothing else. If there is any other more complex mapping then you probably don't
+ have landmarks that make sense to flip.
+!*/
+{
+ DLIB_CASSERT(from.size() == to.size());
+
+ std::vector<long> best_assignment;
+ double best_assignment_cost = std::numeric_limits<double>::infinity();
+
+ matrix<double> dists(from.size(), to.size());
+ matrix<long long> idists;
+
+ for (auto angle : linspace(min_angle, max_angle, num_angles))
+ {
+ auto rot = rotation_matrix(angle);
+ for (long r = 0; r < dists.nr(); ++r)
+ {
+ for (long c = 0; c < dists.nc(); ++c)
+ {
+ dists(r,c) = length_squared(rot*from[r]-to[c]);
+ }
+ }
+
+ idists = matrix_cast<long long>(-round(std::numeric_limits<long long>::max()*(dists/max(dists))));
+
+ auto assignment = max_cost_assignment(idists);
+ auto cost = assignment_cost(dists, assignment);
+ if (cost < best_assignment_cost)
+ {
+ best_assignment_cost = cost;
+ best_assignment = std::move(assignment);
+ }
+ }
+
+
+ // Now compute the alignment error in terms of average distance moved by each part. We
+ // do this so we can give the user a warning if it's impossible to make a good
+ // alignment.
+ running_stats<double> rs;
+ std::vector<dpoint> tmp(to.size());
+ for (size_t i = 0; i < to.size(); ++i)
+ tmp[best_assignment[i]] = to[i];
+ auto tform = find_similarity_transform(from, tmp);
+ for (size_t i = 0; i < from.size(); ++i)
+ rs.add(length(tform(from[i])-tmp[i]));
+ if (rs.mean() > 0.05)
+ {
+ cout << "WARNING, your dataset has object part annotations and you asked imglab to " << endl;
+ cout << "flip the data. Imglab tried to adjust the part labels so that the average" << endl;
+ cout << "part layout in the flipped dataset is the same as the source dataset. " << endl;
+ cout << "However, the part annotation scheme doesn't seem to be left-right symmetric." << endl;
+ cout << "You should manually review the output to make sure the part annotations are " << endl;
+ cout << "labeled as you expect." << endl;
+ }
+
+
+ return best_assignment;
+}
+
+// ----------------------------------------------------------------------------------------
+
+std::map<string,dpoint> normalized_parts (
+ const image_dataset_metadata::box& b
+)
+{
+ auto tform = dlib::impl::normalizing_tform(b.rect);
+ std::map<string,dpoint> temp;
+ for (auto& p : b.parts)
+ temp[p.first] = tform(p.second);
+ return temp;
+}
+
+// ----------------------------------------------------------------------------------------
+
+std::map<string,dpoint> average_parts (
+ const image_dataset_metadata::dataset& data
+)
+/*!
+ ensures
+ - returns the average part layout over all objects in data. This is done by
+ centering the parts inside their rects and then averaging all the objects.
+!*/
+{
+ std::map<string,dpoint> psum;
+ std::map<string,double> pcnt;
+ for (auto& image : data.images)
+ {
+ for (auto& box : image.boxes)
+ {
+ for (auto& p : normalized_parts(box))
+ {
+ psum[p.first] += p.second;
+ pcnt[p.first] += 1;
+ }
+ }
+ }
+
+ // make into an average
+ for (auto& p : psum)
+ p.second /= pcnt[p.first];
+
+ return psum;
+}
+
+// ----------------------------------------------------------------------------------------
+
+void make_part_labeling_match_target_dataset (
+ const image_dataset_metadata::dataset& target,
+ image_dataset_metadata::dataset& data
+)
+/*!
+ This function tries to adjust the part labels in data so that the average part layout
+ in data is the same as target, according to the string labels. Therefore, it doesn't
+ adjust part positions, instead it changes the string labels on the parts to achieve
+ this. This really only makes sense when you flipped a dataset that contains left-right
+ symmetric objects and you want to remap the part labels of the flipped data so that
+ they match the unflipped data's annotation scheme.
+!*/
+{
+ auto target_parts = average_parts(target);
+ auto data_parts = average_parts(data);
+
+ // Convert to a form align_points() understands. We also need to keep track of the
+ // labels for later.
+ std::vector<dpoint> from, to;
+ std::vector<string> from_labels, to_labels;
+ for (auto& p : target_parts)
+ {
+ from_labels.emplace_back(p.first);
+ from.emplace_back(p.second);
+ }
+ for (auto& p : data_parts)
+ {
+ to_labels.emplace_back(p.first);
+ to.emplace_back(p.second);
+ }
+
+ auto assignment = align_points(from, to);
+ // so now we know that from_labels[i] should replace to_labels[assignment[i]]
+ std::map<string,string> label_mapping;
+ for (size_t i = 0; i < assignment.size(); ++i)
+ label_mapping[to_labels[assignment[i]]] = from_labels[i];
+
+ // now apply the label mapping to the dataset
+ for (auto& image : data.images)
+ {
+ for (auto& box : image.boxes)
+ {
+ std::map<string,point> temp;
+ for (auto& p : box.parts)
+ temp[label_mapping[p.first]] = p.second;
+ box.parts = std::move(temp);
+ }
+ }
+}
+
+// ----------------------------------------------------------------------------------------
+
+void flip_dataset(const command_line_parser& parser)
+{
+ image_dataset_metadata::dataset metadata, orig_metadata;
+ string datasource;
+ if (parser.option("flip"))
+ datasource = parser.option("flip").argument();
+ else
+ datasource = parser.option("flip-basic").argument();
+ load_image_dataset_metadata(metadata,datasource);
+ orig_metadata = metadata;
+
+ // Set the current directory to be the one that contains the
+ // metadata file. We do this because the file might contain
+ // file paths which are relative to this folder.
+ set_current_dir(get_parent_directory(file(datasource)));
+
+ const string metadata_filename = get_parent_directory(file(datasource)).full_name() +
+ directory::get_separator() + "flipped_" + file(datasource).name();
+
+
+ array2d<rgb_pixel> img, temp;
+ for (unsigned long i = 0; i < metadata.images.size(); ++i)
+ {
+ file f(metadata.images[i].filename);
+ string filename = get_parent_directory(f).full_name() + directory::get_separator() + "flipped_" + to_png_name(f.name());
+
+ load_image(img, metadata.images[i].filename);
+ flip_image_left_right(img, temp);
+ if (parser.option("jpg"))
+ {
+ filename = to_jpg_name(filename);
+ save_jpeg(temp, filename,JPEG_QUALITY);
+ }
+ else
+ {
+ save_png(temp, filename);
+ }
+
+ for (unsigned long j = 0; j < metadata.images[i].boxes.size(); ++j)
+ {
+ metadata.images[i].boxes[j].rect = impl::flip_rect_left_right(metadata.images[i].boxes[j].rect, get_rect(img));
+
+ // flip all the object parts
+ for (auto& part : metadata.images[i].boxes[j].parts)
+ {
+ part.second = impl::flip_rect_left_right(rectangle(part.second,part.second), get_rect(img)).tl_corner();
+ }
+ }
+
+ metadata.images[i].filename = filename;
+ }
+
+ if (!parser.option("flip-basic"))
+ make_part_labeling_match_target_dataset(orig_metadata, metadata);
+
+ save_image_dataset_metadata(metadata, metadata_filename);
+}
+
+// ----------------------------------------------------------------------------------------
+
diff --git a/ml/dlib/tools/imglab/src/flip_dataset.h b/ml/dlib/tools/imglab/src/flip_dataset.h
new file mode 100644
index 000000000..8ac5db3e8
--- /dev/null
+++ b/ml/dlib/tools/imglab/src/flip_dataset.h
@@ -0,0 +1,12 @@
+// Copyright (C) 2011 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_IMGLAB_FLIP_DaTASET_H__
+#define DLIB_IMGLAB_FLIP_DaTASET_H__
+
+
+#include <dlib/cmd_line_parser.h>
+
+void flip_dataset(const dlib::command_line_parser& parser);
+
+#endif // DLIB_IMGLAB_FLIP_DaTASET_H__
+
diff --git a/ml/dlib/tools/imglab/src/main.cpp b/ml/dlib/tools/imglab/src/main.cpp
new file mode 100644
index 000000000..060c2c870
--- /dev/null
+++ b/ml/dlib/tools/imglab/src/main.cpp
@@ -0,0 +1,1145 @@
+
+#include "dlib/data_io.h"
+#include "dlib/string.h"
+#include "metadata_editor.h"
+#include "convert_pascal_xml.h"
+#include "convert_pascal_v1.h"
+#include "convert_idl.h"
+#include "cluster.h"
+#include "flip_dataset.h"
+#include <dlib/cmd_line_parser.h>
+#include <dlib/image_transforms.h>
+#include <dlib/svm.h>
+#include <dlib/console_progress_indicator.h>
+#include <dlib/md5.h>
+
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <set>
+
+#include <dlib/dir_nav.h>
+
+
+const char* VERSION = "1.13";
+
+
+
+using namespace std;
+using namespace dlib;
+
+// ----------------------------------------------------------------------------------------
+
+void create_new_dataset (
+ const command_line_parser& parser
+)
+{
+ using namespace dlib::image_dataset_metadata;
+
+ const std::string filename = parser.option("c").argument();
+ // make sure the file exists so we can use the get_parent_directory() command to
+ // figure out it's parent directory.
+ make_empty_file(filename);
+ const std::string parent_dir = get_parent_directory(file(filename));
+
+ unsigned long depth = 0;
+ if (parser.option("r"))
+ depth = 30;
+
+ dataset meta;
+ meta.name = "imglab dataset";
+ meta.comment = "Created by imglab tool.";
+ for (unsigned long i = 0; i < parser.number_of_arguments(); ++i)
+ {
+ try
+ {
+ const string temp = strip_path(file(parser[i]), parent_dir);
+ meta.images.push_back(image(temp));
+ }
+ catch (dlib::file::file_not_found&)
+ {
+ // then parser[i] should be a directory
+
+ std::vector<file> files = get_files_in_directory_tree(parser[i],
+ match_endings(".png .PNG .jpeg .JPEG .jpg .JPG .bmp .BMP .dng .DNG .gif .GIF"),
+ depth);
+ sort(files.begin(), files.end());
+
+ for (unsigned long j = 0; j < files.size(); ++j)
+ {
+ meta.images.push_back(image(strip_path(files[j], parent_dir)));
+ }
+ }
+ }
+
+ save_image_dataset_metadata(meta, filename);
+}
+
+// ----------------------------------------------------------------------------------------
+
+int split_dataset (
+ const command_line_parser& parser
+)
+{
+ if (parser.number_of_arguments() != 1)
+ {
+ cerr << "The --split option requires you to give one XML file on the command line." << endl;
+ return EXIT_FAILURE;
+ }
+
+ const std::string label = parser.option("split").argument();
+
+ dlib::image_dataset_metadata::dataset data, data_with, data_without;
+ load_image_dataset_metadata(data, parser[0]);
+
+ data_with.name = data.name;
+ data_with.comment = data.comment;
+ data_without.name = data.name;
+ data_without.comment = data.comment;
+
+ for (unsigned long i = 0; i < data.images.size(); ++i)
+ {
+ auto&& temp = data.images[i];
+
+ bool has_the_label = false;
+ // check for the label we are looking for
+ for (unsigned long j = 0; j < temp.boxes.size(); ++j)
+ {
+ if (temp.boxes[j].label == label)
+ {
+ has_the_label = true;
+ break;
+ }
+ }
+
+ if (has_the_label)
+ data_with.images.push_back(temp);
+ else
+ data_without.images.push_back(temp);
+ }
+
+
+ save_image_dataset_metadata(data_with, left_substr(parser[0],".") + "_with_"+label + ".xml");
+ save_image_dataset_metadata(data_without, left_substr(parser[0],".") + "_without_"+label + ".xml");
+
+ return EXIT_SUCCESS;
+}
+
+// ----------------------------------------------------------------------------------------
+
+void print_all_labels (
+ const dlib::image_dataset_metadata::dataset& data
+)
+{
+ std::set<std::string> labels;
+ for (unsigned long i = 0; i < data.images.size(); ++i)
+ {
+ for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
+ {
+ labels.insert(data.images[i].boxes[j].label);
+ }
+ }
+
+ for (std::set<std::string>::iterator i = labels.begin(); i != labels.end(); ++i)
+ {
+ if (i->size() != 0)
+ {
+ cout << *i << endl;
+ }
+ }
+}
+
+// ----------------------------------------------------------------------------------------
+
+void print_all_label_stats (
+ const dlib::image_dataset_metadata::dataset& data
+)
+{
+ std::map<std::string, running_stats<double> > area_stats, aspect_ratio;
+ std::map<std::string, int> image_hits;
+ std::set<std::string> labels;
+ unsigned long num_unignored_boxes = 0;
+ for (unsigned long i = 0; i < data.images.size(); ++i)
+ {
+ std::set<std::string> temp;
+ for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
+ {
+ labels.insert(data.images[i].boxes[j].label);
+ temp.insert(data.images[i].boxes[j].label);
+
+ area_stats[data.images[i].boxes[j].label].add(data.images[i].boxes[j].rect.area());
+ aspect_ratio[data.images[i].boxes[j].label].add(data.images[i].boxes[j].rect.width()/
+ (double)data.images[i].boxes[j].rect.height());
+
+ if (!data.images[i].boxes[j].ignore)
+ ++num_unignored_boxes;
+ }
+
+ // count the number of images for each label
+ for (std::set<std::string>::iterator i = temp.begin(); i != temp.end(); ++i)
+ image_hits[*i] += 1;
+ }
+
+ cout << "Number of images: "<< data.images.size() << endl;
+ cout << "Number of different labels: "<< labels.size() << endl;
+ cout << "Number of non-ignored boxes: " << num_unignored_boxes << endl << endl;
+
+ for (std::set<std::string>::iterator i = labels.begin(); i != labels.end(); ++i)
+ {
+ if (i->size() == 0)
+ cout << "Unlabeled Boxes:" << endl;
+ else
+ cout << "Label: "<< *i << endl;
+ cout << " number of images: " << image_hits[*i] << endl;
+ cout << " number of occurrences: " << area_stats[*i].current_n() << endl;
+ cout << " min box area: " << area_stats[*i].min() << endl;
+ cout << " max box area: " << area_stats[*i].max() << endl;
+ cout << " mean box area: " << area_stats[*i].mean() << endl;
+ cout << " stddev box area: " << area_stats[*i].stddev() << endl;
+ cout << " mean width/height ratio: " << aspect_ratio[*i].mean() << endl;
+ cout << " stddev width/height ratio: " << aspect_ratio[*i].stddev() << endl;
+ cout << endl;
+ }
+}
+
+// ----------------------------------------------------------------------------------------
+
+void rename_labels (
+ dlib::image_dataset_metadata::dataset& data,
+ const std::string& from,
+ const std::string& to
+)
+{
+ for (unsigned long i = 0; i < data.images.size(); ++i)
+ {
+ for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
+ {
+ if (data.images[i].boxes[j].label == from)
+ data.images[i].boxes[j].label = to;
+ }
+ }
+
+}
+
+// ----------------------------------------------------------------------------------------
+
+void ignore_labels (
+ dlib::image_dataset_metadata::dataset& data,
+ const std::string& label
+)
+{
+ for (unsigned long i = 0; i < data.images.size(); ++i)
+ {
+ for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
+ {
+ if (data.images[i].boxes[j].label == label)
+ data.images[i].boxes[j].ignore = true;
+ }
+ }
+}
+
+// ----------------------------------------------------------------------------------------
+
+void merge_metadata_files (
+ const command_line_parser& parser
+)
+{
+ image_dataset_metadata::dataset src, dest;
+ load_image_dataset_metadata(src, parser.option("add").argument(0));
+ load_image_dataset_metadata(dest, parser.option("add").argument(1));
+
+ std::map<string,image_dataset_metadata::image> merged_data;
+ for (unsigned long i = 0; i < dest.images.size(); ++i)
+ merged_data[dest.images[i].filename] = dest.images[i];
+ // now add in the src data and overwrite anything if there are duplicate entries.
+ for (unsigned long i = 0; i < src.images.size(); ++i)
+ merged_data[src.images[i].filename] = src.images[i];
+
+ // copy merged data into dest
+ dest.images.clear();
+ for (std::map<string,image_dataset_metadata::image>::const_iterator i = merged_data.begin();
+ i != merged_data.end(); ++i)
+ {
+ dest.images.push_back(i->second);
+ }
+
+ save_image_dataset_metadata(dest, "merged.xml");
+}
+
+// ----------------------------------------------------------------------------------------
+
+void rotate_dataset(const command_line_parser& parser)
+{
+ image_dataset_metadata::dataset metadata;
+ const string datasource = parser[0];
+ load_image_dataset_metadata(metadata,datasource);
+
+ double angle = get_option(parser, "rotate", 0);
+
+ // Set the current directory to be the one that contains the
+ // metadata file. We do this because the file might contain
+ // file paths which are relative to this folder.
+ set_current_dir(get_parent_directory(file(datasource)));
+
+ const string file_prefix = "rotated_"+ cast_to_string(angle) + "_";
+ const string metadata_filename = get_parent_directory(file(datasource)).full_name() +
+ directory::get_separator() + file_prefix + file(datasource).name();
+
+
+ array2d<rgb_pixel> img, temp;
+ for (unsigned long i = 0; i < metadata.images.size(); ++i)
+ {
+ file f(metadata.images[i].filename);
+ string filename = get_parent_directory(f).full_name() + directory::get_separator() + file_prefix + to_png_name(f.name());
+
+ load_image(img, metadata.images[i].filename);
+ const point_transform_affine tran = rotate_image(img, temp, angle*pi/180);
+ if (parser.option("jpg"))
+ {
+ filename = to_jpg_name(filename);
+ save_jpeg(temp, filename,JPEG_QUALITY);
+ }
+ else
+ {
+ save_png(temp, filename);
+ }
+
+ for (unsigned long j = 0; j < metadata.images[i].boxes.size(); ++j)
+ {
+ const rectangle rect = metadata.images[i].boxes[j].rect;
+ rectangle newrect;
+ newrect += tran(rect.tl_corner());
+ newrect += tran(rect.tr_corner());
+ newrect += tran(rect.bl_corner());
+ newrect += tran(rect.br_corner());
+ // now make newrect have the same area as the starting rect.
+ double ratio = std::sqrt(rect.area()/(double)newrect.area());
+ newrect = centered_rect(newrect, newrect.width()*ratio, newrect.height()*ratio);
+ metadata.images[i].boxes[j].rect = newrect;
+
+ // rotate all the object parts
+ std::map<std::string,point>::iterator k;
+ for (k = metadata.images[i].boxes[j].parts.begin(); k != metadata.images[i].boxes[j].parts.end(); ++k)
+ {
+ k->second = tran(k->second);
+ }
+ }
+
+ metadata.images[i].filename = filename;
+ }
+
+ save_image_dataset_metadata(metadata, metadata_filename);
+}
+
+// ----------------------------------------------------------------------------------------
+
+int resample_dataset(const command_line_parser& parser)
+{
+ if (parser.number_of_arguments() != 1)
+ {
+ cerr << "The --resample option requires you to give one XML file on the command line." << endl;
+ return EXIT_FAILURE;
+ }
+
+ const size_t obj_size = get_option(parser,"cropped-object-size",100*100);
+ const double margin_scale = get_option(parser,"crop-size",2.5); // cropped image will be this times wider than the object.
+ const unsigned long min_object_size = get_option(parser,"min-object-size",1);
+ const bool one_object_per_image = parser.option("one-object-per-image");
+
+ dlib::image_dataset_metadata::dataset data, resampled_data;
+ std::ostringstream sout;
+ sout << "\nThe --resample parameters which generated this dataset were:" << endl;
+ sout << " cropped-object-size: "<< obj_size << endl;
+ sout << " crop-size: "<< margin_scale << endl;
+ sout << " min-object-size: "<< min_object_size << endl;
+ if (one_object_per_image)
+ sout << " one_object_per_image: true" << endl;
+ resampled_data.comment = data.comment + sout.str();
+ resampled_data.name = data.name + " RESAMPLED";
+
+ load_image_dataset_metadata(data, parser[0]);
+ locally_change_current_dir chdir(get_parent_directory(file(parser[0])));
+ dlib::rand rnd;
+
+ const size_t image_size = std::round(std::sqrt(obj_size*margin_scale*margin_scale));
+ const chip_dims cdims(image_size, image_size);
+
+ console_progress_indicator pbar(data.images.size());
+ for (unsigned long i = 0; i < data.images.size(); ++i)
+ {
+ // don't even bother loading images that don't have objects.
+ if (data.images[i].boxes.size() == 0)
+ continue;
+
+ pbar.print_status(i);
+ array2d<rgb_pixel> img, chip;
+ load_image(img, data.images[i].filename);
+
+
+ // figure out what chips we want to take from this image
+ for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
+ {
+ const rectangle rect = data.images[i].boxes[j].rect;
+ if (data.images[i].boxes[j].ignore || rect.area() < min_object_size)
+ continue;
+
+ const auto max_dim = std::max(rect.width(), rect.height());
+
+ const double rand_scale_perturb = 1 - 0.3*(rnd.get_random_double()-0.5);
+ const rectangle crop_rect = centered_rect(rect, max_dim*margin_scale*rand_scale_perturb, max_dim*margin_scale*rand_scale_perturb);
+
+ const rectangle_transform tform = get_mapping_to_chip(chip_details(crop_rect, cdims));
+ extract_image_chip(img, chip_details(crop_rect, cdims), chip);
+
+ image_dataset_metadata::image dimg;
+ // Now transform the boxes to the crop and also mark them as ignored if they
+ // have already been cropped out or are outside the crop.
+ for (size_t k = 0; k < data.images[i].boxes.size(); ++k)
+ {
+ image_dataset_metadata::box box = data.images[i].boxes[k];
+ // ignore boxes outside the cropped image
+ if (crop_rect.intersect(box.rect).area() == 0)
+ continue;
+
+ // mark boxes we include in the crop as ignored. Also mark boxes that
+ // aren't totally within the crop as ignored.
+ if (crop_rect.contains(grow_rect(box.rect,10)) && (!one_object_per_image || k==j))
+ data.images[i].boxes[k].ignore = true;
+ else
+ box.ignore = true;
+
+ if (box.rect.area() < min_object_size)
+ box.ignore = true;
+
+ box.rect = tform(box.rect);
+ for (auto&& p : box.parts)
+ p.second = tform.get_tform()(p.second);
+ dimg.boxes.push_back(box);
+ }
+ // Put a 64bit hash of the image data into the name to make sure there are no
+ // file name conflicts.
+ std::ostringstream sout;
+ sout << hex << murmur_hash3_128bit(&chip[0][0], chip.size()*sizeof(chip[0][0])).second;
+ dimg.filename = data.images[i].filename + "_RESAMPLED_"+sout.str()+".png";
+
+ if (parser.option("jpg"))
+ {
+ dimg.filename = to_jpg_name(dimg.filename);
+ save_jpeg(chip,dimg.filename, JPEG_QUALITY);
+ }
+ else
+ {
+ save_png(chip,dimg.filename);
+ }
+ resampled_data.images.push_back(dimg);
+ }
+ }
+
+ save_image_dataset_metadata(resampled_data, parser[0] + ".RESAMPLED.xml");
+
+ return EXIT_SUCCESS;
+}
+
+// ----------------------------------------------------------------------------------------
+
+int tile_dataset(const command_line_parser& parser)
+{
+ if (parser.number_of_arguments() != 1)
+ {
+ cerr << "The --tile option requires you to give one XML file on the command line." << endl;
+ return EXIT_FAILURE;
+ }
+
+ string out_image = parser.option("tile").argument();
+ string ext = right_substr(out_image,".");
+ if (ext != "png" && ext != "jpg")
+ {
+ cerr << "The output image file must have either .png or .jpg extension." << endl;
+ return EXIT_FAILURE;
+ }
+
+ const unsigned long chip_size = get_option(parser, "size", 8000);
+
+ dlib::image_dataset_metadata::dataset data;
+ load_image_dataset_metadata(data, parser[0]);
+ locally_change_current_dir chdir(get_parent_directory(file(parser[0])));
+ dlib::array<array2d<rgb_pixel> > images;
+ console_progress_indicator pbar(data.images.size());
+ for (unsigned long i = 0; i < data.images.size(); ++i)
+ {
+ // don't even bother loading images that don't have objects.
+ if (data.images[i].boxes.size() == 0)
+ continue;
+
+ pbar.print_status(i);
+ array2d<rgb_pixel> img;
+ load_image(img, data.images[i].filename);
+
+ // figure out what chips we want to take from this image
+ std::vector<chip_details> dets;
+ for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
+ {
+ if (data.images[i].boxes[j].ignore)
+ continue;
+
+ rectangle rect = data.images[i].boxes[j].rect;
+ dets.push_back(chip_details(rect, chip_size));
+ }
+ // Now grab all those chips at once.
+ dlib::array<array2d<rgb_pixel> > chips;
+ extract_image_chips(img, dets, chips);
+ // and put the chips into the output.
+ for (unsigned long j = 0; j < chips.size(); ++j)
+ images.push_back(chips[j]);
+ }
+
+ chdir.revert();
+
+ if (ext == "png")
+ save_png(tile_images(images), out_image);
+ else
+ save_jpeg(tile_images(images), out_image);
+
+ return EXIT_SUCCESS;
+}
+
+
+// ----------------------------------------------------------------------------------------
+
+int main(int argc, char** argv)
+{
+ try
+ {
+
+ command_line_parser parser;
+
+ parser.add_option("h","Displays this information.");
+ parser.add_option("v","Display version.");
+
+ parser.set_group_name("Creating XML files");
+ parser.add_option("c","Create an XML file named <arg> listing a set of images.",1);
+ parser.add_option("r","Search directories recursively for images.");
+ parser.add_option("convert","Convert foreign image Annotations from <arg> format to the imglab format. "
+ "Supported formats: pascal-xml, pascal-v1, idl.",1);
+
+ parser.set_group_name("Viewing XML files");
+ parser.add_option("tile","Chip out all the objects and save them as one big image called <arg>.",1);
+ parser.add_option("size","When using --tile or --cluster, make each extracted object contain "
+ "about <arg> pixels (default 8000).",1);
+ parser.add_option("l","List all the labels in the given XML file.");
+ parser.add_option("stats","List detailed statistics on the object labels in the given XML file.");
+ parser.add_option("files","List all the files in the given XML file.");
+
+ parser.set_group_name("Editing/Transforming XML datasets");
+ parser.add_option("rename", "Rename all labels of <arg1> to <arg2>.",2);
+ parser.add_option("parts","The display will allow image parts to be labeled. The set of allowable parts "
+ "is defined by <arg> which should be a space separated list of parts.",1);
+ parser.add_option("rmempty","Remove all images that don't contain non-ignored annotations and save the results to a new XML file.");
+ parser.add_option("rmdupes","Remove duplicate images from the dataset. This is done by comparing "
+ "the md5 hash of each image file and removing duplicate images. " );
+ parser.add_option("rmdiff","Set the ignored flag to true for boxes marked as difficult.");
+ parser.add_option("rmtrunc","Set the ignored flag to true for boxes that are partially outside the image.");
+ parser.add_option("sort-num-objects","Sort the images listed an XML file so images with many objects are listed first.");
+ parser.add_option("sort","Alphabetically sort the images in an XML file.");
+ parser.add_option("shuffle","Randomly shuffle the order of the images listed in an XML file.");
+ parser.add_option("seed", "When using --shuffle, set the random seed to the string <arg>.",1);
+ parser.add_option("split", "Split the contents of an XML file into two separate files. One containing the "
+ "images with objects labeled <arg> and another file with all the other images. ",1);
+ parser.add_option("add", "Add the image metadata from <arg1> into <arg2>. If any of the image "
+ "tags are in both files then the ones in <arg2> are deleted and replaced with the "
+ "image tags from <arg1>. The results are saved into merged.xml and neither <arg1> or "
+ "<arg2> files are modified.",2);
+ parser.add_option("flip", "Read an XML image dataset from the <arg> XML file and output a left-right flipped "
+ "version of the dataset and an accompanying flipped XML file named flipped_<arg>. "
+ "We also adjust object part labels after flipping so that the new flipped dataset "
+ "has the same average part layout as the source dataset." ,1);
+ parser.add_option("flip-basic", "This option is just like --flip, except we don't adjust any object part labels after flipping. "
+ "The parts are instead simply mirrored to the flipped dataset.", 1);
+ parser.add_option("rotate", "Read an XML image dataset and output a copy that is rotated counter clockwise by <arg> degrees. "
+ "The output is saved to an XML file prefixed with rotated_<arg>.",1);
+ parser.add_option("cluster", "Cluster all the objects in an XML file into <arg> different clusters and save "
+ "the results as cluster_###.xml and cluster_###.jpg files.",1);
+ parser.add_option("ignore", "Mark boxes labeled as <arg> as ignored. The resulting XML file is output as a separate file and the original is not modified.",1);
+ parser.add_option("rmlabel","Remove all boxes labeled <arg> and save the results to a new XML file.",1);
+ parser.add_option("rm-other-labels","Remove all boxes not labeled <arg> and save the results to a new XML file.",1);
+ parser.add_option("rmignore","Remove all boxes marked ignore and save the results to a new XML file.");
+ parser.add_option("rm-if-overlaps","Remove all boxes labeled <arg> if they overlap any box not labeled <arg> and save the results to a new XML file.",1);
+ parser.add_option("jpg", "When saving images to disk, write them as jpg files instead of png.");
+
+ parser.set_group_name("Cropping sub images");
+ parser.add_option("resample", "Crop out images that are centered on each object in the dataset. "
+ "The output is a new XML dataset.");
+ parser.add_option("cropped-object-size", "When doing --resample, make the cropped objects contain about <arg> pixels (default 10000).",1);
+ parser.add_option("min-object-size", "When doing --resample, skip objects that have fewer than <arg> pixels in them (default 1).",1);
+ parser.add_option("crop-size", "When doing --resample, the entire cropped image will be <arg> times wider than the object (default 2.5).",1);
+ parser.add_option("one-object-per-image", "When doing --resample, only include one non-ignored object per image (i.e. the central object).");
+
+
+
+ parser.parse(argc, argv);
+
+ const char* singles[] = {"h","c","r","l","files","convert","parts","rmdiff", "rmtrunc", "rmdupes", "seed", "shuffle", "split", "add",
+ "flip-basic", "flip", "rotate", "tile", "size", "cluster", "resample", "min-object-size", "rmempty",
+ "crop-size", "cropped-object-size", "rmlabel", "rm-other-labels", "rm-if-overlaps", "sort-num-objects",
+ "one-object-per-image", "jpg", "rmignore", "sort"};
+ parser.check_one_time_options(singles);
+ const char* c_sub_ops[] = {"r", "convert"};
+ parser.check_sub_options("c", c_sub_ops);
+ parser.check_sub_option("shuffle", "seed");
+ const char* resample_sub_ops[] = {"min-object-size", "crop-size", "cropped-object-size", "one-object-per-image"};
+ parser.check_sub_options("resample", resample_sub_ops);
+ const char* size_parent_ops[] = {"tile", "cluster"};
+ parser.check_sub_options(size_parent_ops, "size");
+ parser.check_incompatible_options("c", "l");
+ parser.check_incompatible_options("c", "files");
+ parser.check_incompatible_options("c", "rmdiff");
+ parser.check_incompatible_options("c", "rmempty");
+ parser.check_incompatible_options("c", "rmlabel");
+ parser.check_incompatible_options("c", "rm-other-labels");
+ parser.check_incompatible_options("c", "rmignore");
+ parser.check_incompatible_options("c", "rm-if-overlaps");
+ parser.check_incompatible_options("c", "rmdupes");
+ parser.check_incompatible_options("c", "rmtrunc");
+ parser.check_incompatible_options("c", "add");
+ parser.check_incompatible_options("c", "flip");
+ parser.check_incompatible_options("c", "flip-basic");
+ parser.check_incompatible_options("flip", "flip-basic");
+ parser.check_incompatible_options("c", "rotate");
+ parser.check_incompatible_options("c", "rename");
+ parser.check_incompatible_options("c", "ignore");
+ parser.check_incompatible_options("c", "parts");
+ parser.check_incompatible_options("c", "tile");
+ parser.check_incompatible_options("c", "cluster");
+ parser.check_incompatible_options("c", "resample");
+ parser.check_incompatible_options("l", "rename");
+ parser.check_incompatible_options("l", "ignore");
+ parser.check_incompatible_options("l", "add");
+ parser.check_incompatible_options("l", "parts");
+ parser.check_incompatible_options("l", "flip");
+ parser.check_incompatible_options("l", "flip-basic");
+ parser.check_incompatible_options("l", "rotate");
+ parser.check_incompatible_options("files", "rename");
+ parser.check_incompatible_options("files", "ignore");
+ parser.check_incompatible_options("files", "add");
+ parser.check_incompatible_options("files", "parts");
+ parser.check_incompatible_options("files", "flip");
+ parser.check_incompatible_options("files", "flip-basic");
+ parser.check_incompatible_options("files", "rotate");
+ parser.check_incompatible_options("add", "flip");
+ parser.check_incompatible_options("add", "flip-basic");
+ parser.check_incompatible_options("add", "rotate");
+ parser.check_incompatible_options("add", "tile");
+ parser.check_incompatible_options("flip", "tile");
+ parser.check_incompatible_options("flip-basic", "tile");
+ parser.check_incompatible_options("rotate", "tile");
+ parser.check_incompatible_options("cluster", "tile");
+ parser.check_incompatible_options("resample", "tile");
+ parser.check_incompatible_options("flip", "cluster");
+ parser.check_incompatible_options("flip-basic", "cluster");
+ parser.check_incompatible_options("rotate", "cluster");
+ parser.check_incompatible_options("add", "cluster");
+ parser.check_incompatible_options("flip", "resample");
+ parser.check_incompatible_options("flip-basic", "resample");
+ parser.check_incompatible_options("rotate", "resample");
+ parser.check_incompatible_options("add", "resample");
+ parser.check_incompatible_options("shuffle", "tile");
+ parser.check_incompatible_options("sort-num-objects", "tile");
+ parser.check_incompatible_options("sort", "tile");
+ parser.check_incompatible_options("convert", "l");
+ parser.check_incompatible_options("convert", "files");
+ parser.check_incompatible_options("convert", "rename");
+ parser.check_incompatible_options("convert", "ignore");
+ parser.check_incompatible_options("convert", "parts");
+ parser.check_incompatible_options("convert", "cluster");
+ parser.check_incompatible_options("convert", "resample");
+ parser.check_incompatible_options("rmdiff", "rename");
+ parser.check_incompatible_options("rmdiff", "ignore");
+ parser.check_incompatible_options("rmempty", "ignore");
+ parser.check_incompatible_options("rmempty", "rename");
+ parser.check_incompatible_options("rmlabel", "ignore");
+ parser.check_incompatible_options("rmlabel", "rename");
+ parser.check_incompatible_options("rm-other-labels", "ignore");
+ parser.check_incompatible_options("rm-other-labels", "rename");
+ parser.check_incompatible_options("rmignore", "ignore");
+ parser.check_incompatible_options("rmignore", "rename");
+ parser.check_incompatible_options("rm-if-overlaps", "ignore");
+ parser.check_incompatible_options("rm-if-overlaps", "rename");
+ parser.check_incompatible_options("rmdupes", "rename");
+ parser.check_incompatible_options("rmdupes", "ignore");
+ parser.check_incompatible_options("rmtrunc", "rename");
+ parser.check_incompatible_options("rmtrunc", "ignore");
+ const char* convert_args[] = {"pascal-xml","pascal-v1","idl"};
+ parser.check_option_arg_range("convert", convert_args);
+ parser.check_option_arg_range("cluster", 2, 999);
+ parser.check_option_arg_range("rotate", -360, 360);
+ parser.check_option_arg_range("size", 10*10, 1000*1000);
+ parser.check_option_arg_range("min-object-size", 1, 10000*10000);
+ parser.check_option_arg_range("cropped-object-size", 4, 10000*10000);
+ parser.check_option_arg_range("crop-size", 1.0, 100.0);
+
+ if (parser.option("h"))
+ {
+ cout << "Usage: imglab [options] <image files/directories or XML file>\n";
+ parser.print_options(cout);
+ cout << endl << endl;
+ return EXIT_SUCCESS;
+ }
+
+ if (parser.option("add"))
+ {
+ merge_metadata_files(parser);
+ return EXIT_SUCCESS;
+ }
+
+ if (parser.option("flip") || parser.option("flip-basic"))
+ {
+ flip_dataset(parser);
+ return EXIT_SUCCESS;
+ }
+
+ if (parser.option("rotate"))
+ {
+ rotate_dataset(parser);
+ return EXIT_SUCCESS;
+ }
+
+ if (parser.option("v"))
+ {
+ cout << "imglab v" << VERSION
+ << "\nCompiled: " << __TIME__ << " " << __DATE__
+ << "\nWritten by Davis King\n";
+ cout << "Check for updates at http://dlib.net\n\n";
+ return EXIT_SUCCESS;
+ }
+
+ if (parser.option("tile"))
+ {
+ return tile_dataset(parser);
+ }
+
+ if (parser.option("cluster"))
+ {
+ return cluster_dataset(parser);
+ }
+
+ if (parser.option("resample"))
+ {
+ return resample_dataset(parser);
+ }
+
+ if (parser.option("c"))
+ {
+ if (parser.option("convert"))
+ {
+ if (parser.option("convert").argument() == "pascal-xml")
+ convert_pascal_xml(parser);
+ else if (parser.option("convert").argument() == "pascal-v1")
+ convert_pascal_v1(parser);
+ else if (parser.option("convert").argument() == "idl")
+ convert_idl(parser);
+ }
+ else
+ {
+ create_new_dataset(parser);
+ }
+ return EXIT_SUCCESS;
+ }
+
+ if (parser.option("rmdiff"))
+ {
+ if (parser.number_of_arguments() != 1)
+ {
+ cerr << "The --rmdiff option requires you to give one XML file on the command line." << endl;
+ return EXIT_FAILURE;
+ }
+
+ dlib::image_dataset_metadata::dataset data;
+ load_image_dataset_metadata(data, parser[0]);
+ for (unsigned long i = 0; i < data.images.size(); ++i)
+ {
+ for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
+ {
+ if (data.images[i].boxes[j].difficult)
+ data.images[i].boxes[j].ignore = true;
+ }
+ }
+ save_image_dataset_metadata(data, parser[0]);
+ return EXIT_SUCCESS;
+ }
+
+ if (parser.option("rmempty"))
+ {
+ if (parser.number_of_arguments() != 1)
+ {
+ cerr << "The --rmempty option requires you to give one XML file on the command line." << endl;
+ return EXIT_FAILURE;
+ }
+
+ dlib::image_dataset_metadata::dataset data, data2;
+ load_image_dataset_metadata(data, parser[0]);
+
+ data2 = data;
+ data2.images.clear();
+ for (unsigned long i = 0; i < data.images.size(); ++i)
+ {
+ bool has_label = false;
+ for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
+ {
+ if (!data.images[i].boxes[j].ignore)
+ has_label = true;
+ }
+ if (has_label)
+ data2.images.push_back(data.images[i]);
+ }
+ save_image_dataset_metadata(data2, parser[0] + ".rmempty.xml");
+ return EXIT_SUCCESS;
+ }
+
+ if (parser.option("rmlabel"))
+ {
+ if (parser.number_of_arguments() != 1)
+ {
+ cerr << "The --rmlabel option requires you to give one XML file on the command line." << endl;
+ return EXIT_FAILURE;
+ }
+
+ dlib::image_dataset_metadata::dataset data;
+ load_image_dataset_metadata(data, parser[0]);
+
+ const auto label = parser.option("rmlabel").argument();
+
+ for (auto&& img : data.images)
+ {
+ std::vector<dlib::image_dataset_metadata::box> boxes;
+ for (auto&& b : img.boxes)
+ {
+ if (b.label != label)
+ boxes.push_back(b);
+ }
+ img.boxes = boxes;
+ }
+
+ save_image_dataset_metadata(data, parser[0] + ".rmlabel-"+label+".xml");
+ return EXIT_SUCCESS;
+ }
+
+ if (parser.option("rm-other-labels"))
+ {
+ if (parser.number_of_arguments() != 1)
+ {
+ cerr << "The --rm-other-labels option requires you to give one XML file on the command line." << endl;
+ return EXIT_FAILURE;
+ }
+
+ dlib::image_dataset_metadata::dataset data;
+ load_image_dataset_metadata(data, parser[0]);
+
+ const auto labels = parser.option("rm-other-labels").argument();
+ // replace comma by dash to form the file name
+ std::string strlabels = labels;
+ std::replace(strlabels.begin(), strlabels.end(), ',', '-');
+ std::vector<string> all_labels = split(labels, ",");
+ for (auto&& img : data.images)
+ {
+ std::vector<dlib::image_dataset_metadata::box> boxes;
+ for (auto&& b : img.boxes)
+ {
+ if (std::find(all_labels.begin(), all_labels.end(), b.label) != all_labels.end())
+ boxes.push_back(b);
+ }
+ img.boxes = boxes;
+ }
+
+ save_image_dataset_metadata(data, parser[0] + ".rm-other-labels-"+ strlabels +".xml");
+ return EXIT_SUCCESS;
+ }
+
+ if (parser.option("rmignore"))
+ {
+ if (parser.number_of_arguments() != 1)
+ {
+ cerr << "The --rmignore option requires you to give one XML file on the command line." << endl;
+ return EXIT_FAILURE;
+ }
+
+ dlib::image_dataset_metadata::dataset data;
+ load_image_dataset_metadata(data, parser[0]);
+
+ for (auto&& img : data.images)
+ {
+ std::vector<dlib::image_dataset_metadata::box> boxes;
+ for (auto&& b : img.boxes)
+ {
+ if (!b.ignore)
+ boxes.push_back(b);
+ }
+ img.boxes = boxes;
+ }
+
+ save_image_dataset_metadata(data, parser[0] + ".rmignore.xml");
+ return EXIT_SUCCESS;
+ }
+
+ if (parser.option("rm-if-overlaps"))
+ {
+ if (parser.number_of_arguments() != 1)
+ {
+ cerr << "The --rm-if-overlaps option requires you to give one XML file on the command line." << endl;
+ return EXIT_FAILURE;
+ }
+
+ dlib::image_dataset_metadata::dataset data;
+ load_image_dataset_metadata(data, parser[0]);
+
+ const auto label = parser.option("rm-if-overlaps").argument();
+
+ test_box_overlap overlaps(0.5);
+
+ for (auto&& img : data.images)
+ {
+ std::vector<dlib::image_dataset_metadata::box> boxes;
+ for (auto&& b : img.boxes)
+ {
+ if (b.label != label)
+ {
+ boxes.push_back(b);
+ }
+ else
+ {
+ bool has_overlap = false;
+ for (auto&& b2 : img.boxes)
+ {
+ if (b2.label != label && overlaps(b2.rect, b.rect))
+ {
+ has_overlap = true;
+ break;
+ }
+ }
+ if (!has_overlap)
+ boxes.push_back(b);
+ }
+ }
+ img.boxes = boxes;
+ }
+
+ save_image_dataset_metadata(data, parser[0] + ".rm-if-overlaps-"+label+".xml");
+ return EXIT_SUCCESS;
+ }
+
+ if (parser.option("rmdupes"))
+ {
+ if (parser.number_of_arguments() != 1)
+ {
+ cerr << "The --rmdupes option requires you to give one XML file on the command line." << endl;
+ return EXIT_FAILURE;
+ }
+
+ dlib::image_dataset_metadata::dataset data, data_out;
+ std::set<std::string> hashes;
+ load_image_dataset_metadata(data, parser[0]);
+ data_out = data;
+ data_out.images.clear();
+
+ for (unsigned long i = 0; i < data.images.size(); ++i)
+ {
+ ifstream fin(data.images[i].filename.c_str(), ios::binary);
+ string hash = md5(fin);
+ if (hashes.count(hash) == 0)
+ {
+ hashes.insert(hash);
+ data_out.images.push_back(data.images[i]);
+ }
+ }
+ save_image_dataset_metadata(data_out, parser[0]);
+ return EXIT_SUCCESS;
+ }
+
+ if (parser.option("rmtrunc"))
+ {
+ if (parser.number_of_arguments() != 1)
+ {
+ cerr << "The --rmtrunc option requires you to give one XML file on the command line." << endl;
+ return EXIT_FAILURE;
+ }
+
+ dlib::image_dataset_metadata::dataset data;
+ load_image_dataset_metadata(data, parser[0]);
+ {
+ locally_change_current_dir chdir(get_parent_directory(file(parser[0])));
+ for (unsigned long i = 0; i < data.images.size(); ++i)
+ {
+ array2d<unsigned char> img;
+ load_image(img, data.images[i].filename);
+ const rectangle area = get_rect(img);
+ for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
+ {
+ if (!area.contains(data.images[i].boxes[j].rect))
+ data.images[i].boxes[j].ignore = true;
+ }
+ }
+ }
+ save_image_dataset_metadata(data, parser[0]);
+ return EXIT_SUCCESS;
+ }
+
+ if (parser.option("l"))
+ {
+ if (parser.number_of_arguments() != 1)
+ {
+ cerr << "The -l option requires you to give one XML file on the command line." << endl;
+ return EXIT_FAILURE;
+ }
+
+ dlib::image_dataset_metadata::dataset data;
+ load_image_dataset_metadata(data, parser[0]);
+ print_all_labels(data);
+ return EXIT_SUCCESS;
+ }
+
+ if (parser.option("files"))
+ {
+ if (parser.number_of_arguments() != 1)
+ {
+ cerr << "The --files option requires you to give one XML file on the command line." << endl;
+ return EXIT_FAILURE;
+ }
+
+ dlib::image_dataset_metadata::dataset data;
+ load_image_dataset_metadata(data, parser[0]);
+ for (size_t i = 0; i < data.images.size(); ++i)
+ cout << data.images[i].filename << "\n";
+ return EXIT_SUCCESS;
+ }
+
+ if (parser.option("split"))
+ {
+ return split_dataset(parser);
+ }
+
+ if (parser.option("shuffle"))
+ {
+ if (parser.number_of_arguments() != 1)
+ {
+ cerr << "The --shuffle option requires you to give one XML file on the command line." << endl;
+ return EXIT_FAILURE;
+ }
+
+ dlib::image_dataset_metadata::dataset data;
+ load_image_dataset_metadata(data, parser[0]);
+ const string default_seed = cast_to_string(time(0));
+ const string seed = get_option(parser, "seed", default_seed);
+ dlib::rand rnd(seed);
+ randomize_samples(data.images, rnd);
+ save_image_dataset_metadata(data, parser[0]);
+ return EXIT_SUCCESS;
+ }
+
+ if (parser.option("sort-num-objects"))
+ {
+ if (parser.number_of_arguments() != 1)
+ {
+ cerr << "The --sort-num-objects option requires you to give one XML file on the command line." << endl;
+ return EXIT_FAILURE;
+ }
+
+ dlib::image_dataset_metadata::dataset data;
+ load_image_dataset_metadata(data, parser[0]);
+ std::sort(data.images.rbegin(), data.images.rend(),
+ [](const image_dataset_metadata::image& a, const image_dataset_metadata::image& b) { return a.boxes.size() < b.boxes.size(); });
+ save_image_dataset_metadata(data, parser[0]);
+ return EXIT_SUCCESS;
+ }
+
+ if (parser.option("sort"))
+ {
+ if (parser.number_of_arguments() != 1)
+ {
+ cerr << "The --sort option requires you to give one XML file on the command line." << endl;
+ return EXIT_FAILURE;
+ }
+
+ dlib::image_dataset_metadata::dataset data;
+ load_image_dataset_metadata(data, parser[0]);
+ std::sort(data.images.begin(), data.images.end(),
+ [](const image_dataset_metadata::image& a, const image_dataset_metadata::image& b) { return a.filename < b.filename; });
+ save_image_dataset_metadata(data, parser[0]);
+ return EXIT_SUCCESS;
+ }
+
+ if (parser.option("stats"))
+ {
+ if (parser.number_of_arguments() != 1)
+ {
+ cerr << "The --stats option requires you to give one XML file on the command line." << endl;
+ return EXIT_FAILURE;
+ }
+
+ dlib::image_dataset_metadata::dataset data;
+ load_image_dataset_metadata(data, parser[0]);
+ print_all_label_stats(data);
+ return EXIT_SUCCESS;
+ }
+
+ if (parser.option("rename"))
+ {
+ if (parser.number_of_arguments() != 1)
+ {
+ cerr << "The --rename option requires you to give one XML file on the command line." << endl;
+ return EXIT_FAILURE;
+ }
+
+ dlib::image_dataset_metadata::dataset data;
+ load_image_dataset_metadata(data, parser[0]);
+ for (unsigned long i = 0; i < parser.option("rename").count(); ++i)
+ {
+ rename_labels(data, parser.option("rename").argument(0,i), parser.option("rename").argument(1,i));
+ }
+ save_image_dataset_metadata(data, parser[0]);
+ return EXIT_SUCCESS;
+ }
+
+ if (parser.option("ignore"))
+ {
+ if (parser.number_of_arguments() != 1)
+ {
+ cerr << "The --ignore option requires you to give one XML file on the command line." << endl;
+ return EXIT_FAILURE;
+ }
+
+ dlib::image_dataset_metadata::dataset data;
+ load_image_dataset_metadata(data, parser[0]);
+ for (unsigned long i = 0; i < parser.option("ignore").count(); ++i)
+ {
+ ignore_labels(data, parser.option("ignore").argument());
+ }
+ save_image_dataset_metadata(data, parser[0]+".ignored.xml");
+ return EXIT_SUCCESS;
+ }
+
+ if (parser.number_of_arguments() == 1)
+ {
+ metadata_editor editor(parser[0]);
+ if (parser.option("parts"))
+ {
+ std::vector<string> parts = split(parser.option("parts").argument());
+ for (unsigned long i = 0; i < parts.size(); ++i)
+ {
+ editor.add_labelable_part_name(parts[i]);
+ }
+ }
+ editor.wait_until_closed();
+ return EXIT_SUCCESS;
+ }
+
+ cout << "Invalid command, give -h to see options." << endl;
+ return EXIT_FAILURE;
+ }
+ catch (exception& e)
+ {
+ cerr << e.what() << endl;
+ return EXIT_FAILURE;
+ }
+}
+
+// ----------------------------------------------------------------------------------------
+
diff --git a/ml/dlib/tools/imglab/src/metadata_editor.cpp b/ml/dlib/tools/imglab/src/metadata_editor.cpp
new file mode 100644
index 000000000..76177e893
--- /dev/null
+++ b/ml/dlib/tools/imglab/src/metadata_editor.cpp
@@ -0,0 +1,671 @@
+// Copyright (C) 2011 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+
+#include "metadata_editor.h"
+#include <dlib/array.h>
+#include <dlib/queue.h>
+#include <dlib/static_set.h>
+#include <dlib/misc_api.h>
+#include <dlib/image_io.h>
+#include <dlib/array2d.h>
+#include <dlib/pixel.h>
+#include <dlib/image_transforms.h>
+#include <dlib/image_processing.h>
+#include <sstream>
+#include <ctime>
+
+using namespace std;
+using namespace dlib;
+
+extern const char* VERSION;
+
+// ----------------------------------------------------------------------------------------
+
+metadata_editor::
+metadata_editor(
+ const std::string& filename_
+) :
+ mbar(*this),
+ lb_images(*this),
+ image_pos(0),
+ display(*this),
+ overlay_label_name(*this),
+ overlay_label(*this),
+ keyboard_jump_pos(0),
+ last_keyboard_jump_pos_update(0)
+{
+ file metadata_file(filename_);
+ filename = metadata_file.full_name();
+ // Make our current directory be the one that contains the metadata file. We
+ // do this because that file might contain relative paths to the image files
+ // we are supposed to be loading.
+ set_current_dir(get_parent_directory(metadata_file).full_name());
+
+ load_image_dataset_metadata(metadata, filename);
+
+ dlib::array<std::string>::expand_1a files;
+ files.resize(metadata.images.size());
+ for (unsigned long i = 0; i < metadata.images.size(); ++i)
+ {
+ files[i] = metadata.images[i].filename;
+ }
+ lb_images.load(files);
+ lb_images.enable_multiple_select();
+
+ lb_images.set_click_handler(*this, &metadata_editor::on_lb_images_clicked);
+
+ overlay_label_name.set_text("Next Label: ");
+ overlay_label.set_width(200);
+
+ display.set_image_clicked_handler(*this, &metadata_editor::on_image_clicked);
+ display.set_overlay_rects_changed_handler(*this, &metadata_editor::on_overlay_rects_changed);
+ display.set_overlay_rect_selected_handler(*this, &metadata_editor::on_overlay_rect_selected);
+ overlay_label.set_text_modified_handler(*this, &metadata_editor::on_overlay_label_changed);
+
+ mbar.set_number_of_menus(2);
+ mbar.set_menu_name(0,"File",'F');
+ mbar.set_menu_name(1,"Help",'H');
+
+
+ mbar.menu(0).add_menu_item(menu_item_text("Save",*this,&metadata_editor::file_save,'S'));
+ mbar.menu(0).add_menu_item(menu_item_text("Save As",*this,&metadata_editor::file_save_as,'A'));
+ mbar.menu(0).add_menu_item(menu_item_separator());
+ mbar.menu(0).add_menu_item(menu_item_text("Remove Selected Images",*this,&metadata_editor::remove_selected_images,'R'));
+ mbar.menu(0).add_menu_item(menu_item_separator());
+ mbar.menu(0).add_menu_item(menu_item_text("Exit",static_cast<base_window&>(*this),&drawable_window::close_window,'x'));
+
+ mbar.menu(1).add_menu_item(menu_item_text("About",*this,&metadata_editor::display_about,'A'));
+
+ // set the size of this window.
+ on_window_resized();
+ load_image_and_set_size(0);
+ on_window_resized();
+ if (image_pos < lb_images.size() )
+ lb_images.select(image_pos);
+
+ // make sure the window is centered on the screen.
+ unsigned long width, height;
+ get_size(width, height);
+ unsigned long screen_width, screen_height;
+ get_display_size(screen_width, screen_height);
+ set_pos((screen_width-width)/2, (screen_height-height)/2);
+
+ show();
+}
+
+// ----------------------------------------------------------------------------------------
+
+metadata_editor::
+~metadata_editor(
+)
+{
+ close_window();
+}
+
+// ----------------------------------------------------------------------------------------
+
+void metadata_editor::
+add_labelable_part_name (
+ const std::string& name
+)
+{
+ display.add_labelable_part_name(name);
+}
+
+// ----------------------------------------------------------------------------------------
+
+void metadata_editor::
+file_save()
+{
+ save_metadata_to_file(filename);
+}
+
+// ----------------------------------------------------------------------------------------
+
+void metadata_editor::
+save_metadata_to_file (
+ const std::string& file
+)
+{
+ try
+ {
+ save_image_dataset_metadata(metadata, file);
+ }
+ catch (dlib::error& e)
+ {
+ message_box("Error saving file", e.what());
+ }
+}
+
+// ----------------------------------------------------------------------------------------
+
+void metadata_editor::
+file_save_as()
+{
+ save_file_box(*this, &metadata_editor::save_metadata_to_file);
+}
+
+// ----------------------------------------------------------------------------------------
+
+void metadata_editor::
+remove_selected_images()
+{
+ dlib::queue<unsigned long>::kernel_1a list;
+ lb_images.get_selected(list);
+ list.reset();
+ unsigned long min_idx = lb_images.size();
+ while (list.move_next())
+ {
+ lb_images.unselect(list.element());
+ min_idx = std::min(min_idx, list.element());
+ }
+
+
+ // remove all the selected items from metadata.images
+ dlib::static_set<unsigned long>::kernel_1a to_remove;
+ to_remove.load(list);
+ std::vector<dlib::image_dataset_metadata::image> images;
+ for (unsigned long i = 0; i < metadata.images.size(); ++i)
+ {
+ if (to_remove.is_member(i) == false)
+ {
+ images.push_back(metadata.images[i]);
+ }
+ }
+ images.swap(metadata.images);
+
+
+ // reload metadata into lb_images
+ dlib::array<std::string>::expand_1a files;
+ files.resize(metadata.images.size());
+ for (unsigned long i = 0; i < metadata.images.size(); ++i)
+ {
+ files[i] = metadata.images[i].filename;
+ }
+ lb_images.load(files);
+
+
+ if (min_idx != 0)
+ min_idx--;
+ select_image(min_idx);
+}
+
+// ----------------------------------------------------------------------------------------
+
+void metadata_editor::
+on_window_resized(
+)
+{
+ drawable_window::on_window_resized();
+
+ unsigned long width, height;
+ get_size(width, height);
+
+ lb_images.set_pos(0,mbar.bottom()+1);
+ lb_images.set_size(180, height - mbar.height());
+
+ overlay_label_name.set_pos(lb_images.right()+10, mbar.bottom() + (overlay_label.height()-overlay_label_name.height())/2+1);
+ overlay_label.set_pos(overlay_label_name.right(), mbar.bottom()+1);
+ display.set_pos(lb_images.right(), overlay_label.bottom()+3);
+
+ display.set_size(width - display.left(), height - display.top());
+}
+
+// ----------------------------------------------------------------------------------------
+
+void propagate_boxes(
+ dlib::image_dataset_metadata::dataset& data,
+ unsigned long prev,
+ unsigned long next
+)
+{
+ if (prev == next || next >= data.images.size())
+ return;
+
+ array2d<rgb_pixel> img1, img2;
+ dlib::load_image(img1, data.images[prev].filename);
+ dlib::load_image(img2, data.images[next].filename);
+ for (unsigned long i = 0; i < data.images[prev].boxes.size(); ++i)
+ {
+ correlation_tracker tracker;
+ tracker.start_track(img1, data.images[prev].boxes[i].rect);
+ tracker.update(img2);
+ dlib::image_dataset_metadata::box box = data.images[prev].boxes[i];
+ box.rect = tracker.get_position();
+ data.images[next].boxes.push_back(box);
+ }
+}
+
+// ----------------------------------------------------------------------------------------
+
+void propagate_labels(
+ const std::string& label,
+ dlib::image_dataset_metadata::dataset& data,
+ unsigned long prev,
+ unsigned long next
+)
+{
+ if (prev == next || next >= data.images.size())
+ return;
+
+
+ for (unsigned long i = 0; i < data.images[prev].boxes.size(); ++i)
+ {
+ if (data.images[prev].boxes[i].label != label)
+ continue;
+
+ // figure out which box in the next image matches the current one the best
+ const rectangle cur = data.images[prev].boxes[i].rect;
+ double best_overlap = 0;
+ unsigned long best_idx = 0;
+ for (unsigned long j = 0; j < data.images[next].boxes.size(); ++j)
+ {
+ const rectangle next_box = data.images[next].boxes[j].rect;
+ const double overlap = cur.intersect(next_box).area()/(double)(cur+next_box).area();
+ if (overlap > best_overlap)
+ {
+ best_overlap = overlap;
+ best_idx = j;
+ }
+ }
+
+ // If we found a matching rectangle in the next image and the best match doesn't
+ // already have a label.
+ if (best_overlap > 0.5 && data.images[next].boxes[best_idx].label == "")
+ {
+ data.images[next].boxes[best_idx].label = label;
+ }
+ }
+
+}
+
+// ----------------------------------------------------------------------------------------
+
+bool has_label_or_all_boxes_labeled (
+ const std::string& label,
+ const dlib::image_dataset_metadata::image& img
+)
+{
+ if (label.size() == 0)
+ return true;
+
+ bool all_boxes_labeled = true;
+ for (unsigned long i = 0; i < img.boxes.size(); ++i)
+ {
+ if (img.boxes[i].label == label)
+ return true;
+ if (img.boxes[i].label.size() == 0)
+ all_boxes_labeled = false;
+ }
+
+ return all_boxes_labeled;
+}
+
+// ----------------------------------------------------------------------------------------
+
+void metadata_editor::
+on_keydown (
+ unsigned long key,
+ bool is_printable,
+ unsigned long state
+)
+{
+ drawable_window::on_keydown(key, is_printable, state);
+
+ if (is_printable)
+ {
+ if (key == '\t')
+ {
+ overlay_label.give_input_focus();
+ overlay_label.select_all_text();
+ }
+
+ // If the user types a number then jump to that image.
+ if ('0' <= key && key <= '9' && metadata.images.size() != 0 && !overlay_label.has_input_focus())
+ {
+ time_t curtime = time(0);
+ // If it's been a while since the user typed numbers then forget the last jump
+ // position and start accumulating numbers over again.
+ if (curtime-last_keyboard_jump_pos_update >= 2)
+ keyboard_jump_pos = 0;
+ last_keyboard_jump_pos_update = curtime;
+
+ keyboard_jump_pos *= 10;
+ keyboard_jump_pos += key-'0';
+ if (keyboard_jump_pos >= metadata.images.size())
+ keyboard_jump_pos = metadata.images.size()-1;
+
+ image_pos = keyboard_jump_pos;
+ select_image(image_pos);
+ }
+ else
+ {
+ last_keyboard_jump_pos_update = 0;
+ }
+
+ if (key == 'd' && (state&base_window::KBD_MOD_ALT))
+ {
+ remove_selected_images();
+ }
+
+ if (key == 'e' && !overlay_label.has_input_focus())
+ {
+ display_equialized_image = !display_equialized_image;
+ select_image(image_pos);
+ }
+
+ // Make 'w' and 's' act like KEY_UP and KEY_DOWN
+ if ((key == 'w' || key == 'W') && !overlay_label.has_input_focus())
+ {
+ key = base_window::KEY_UP;
+ }
+ else if ((key == 's' || key == 'S') && !overlay_label.has_input_focus())
+ {
+ key = base_window::KEY_DOWN;
+ }
+ else
+ {
+ return;
+ }
+ }
+
+ if (key == base_window::KEY_UP)
+ {
+ if ((state&KBD_MOD_CONTROL) && (state&KBD_MOD_SHIFT))
+ {
+ // Don't do anything if there are no boxes in the current image.
+ if (metadata.images[image_pos].boxes.size() == 0)
+ return;
+ // Also don't do anything if there *are* boxes in the next image.
+ if (image_pos > 1 && metadata.images[image_pos-1].boxes.size() != 0)
+ return;
+
+ propagate_boxes(metadata, image_pos, image_pos-1);
+ }
+ else if (state&base_window::KBD_MOD_CONTROL)
+ {
+ // If the label we are supposed to propagate doesn't exist in the current image
+ // then don't advance.
+ if (!has_label_or_all_boxes_labeled(display.get_default_overlay_rect_label(),metadata.images[image_pos]))
+ return;
+
+ // if the next image is going to be empty then fast forward to the next one
+ while (image_pos > 1 && metadata.images[image_pos-1].boxes.size() == 0)
+ --image_pos;
+
+ propagate_labels(display.get_default_overlay_rect_label(), metadata, image_pos, image_pos-1);
+ }
+ select_image(image_pos-1);
+ }
+ else if (key == base_window::KEY_DOWN)
+ {
+ if ((state&KBD_MOD_CONTROL) && (state&KBD_MOD_SHIFT))
+ {
+ // Don't do anything if there are no boxes in the current image.
+ if (metadata.images[image_pos].boxes.size() == 0)
+ return;
+ // Also don't do anything if there *are* boxes in the next image.
+ if (image_pos+1 < metadata.images.size() && metadata.images[image_pos+1].boxes.size() != 0)
+ return;
+
+ propagate_boxes(metadata, image_pos, image_pos+1);
+ }
+ else if (state&base_window::KBD_MOD_CONTROL)
+ {
+ // If the label we are supposed to propagate doesn't exist in the current image
+ // then don't advance.
+ if (!has_label_or_all_boxes_labeled(display.get_default_overlay_rect_label(),metadata.images[image_pos]))
+ return;
+
+ // if the next image is going to be empty then fast forward to the next one
+ while (image_pos+1 < metadata.images.size() && metadata.images[image_pos+1].boxes.size() == 0)
+ ++image_pos;
+
+ propagate_labels(display.get_default_overlay_rect_label(), metadata, image_pos, image_pos+1);
+ }
+ select_image(image_pos+1);
+ }
+}
+
+// ----------------------------------------------------------------------------------------
+
+void metadata_editor::
+select_image(
+ unsigned long idx
+)
+{
+ if (idx < lb_images.size())
+ {
+ // unselect all currently selected images
+ dlib::queue<unsigned long>::kernel_1a list;
+ lb_images.get_selected(list);
+ list.reset();
+ while (list.move_next())
+ {
+ lb_images.unselect(list.element());
+ }
+
+
+ lb_images.select(idx);
+ load_image(idx);
+ }
+ else if (lb_images.size() == 0)
+ {
+ display.clear_overlay();
+ array2d<unsigned char> empty_img;
+ display.set_image(empty_img);
+ }
+}
+
+// ----------------------------------------------------------------------------------------
+
+void metadata_editor::
+on_lb_images_clicked(
+ unsigned long idx
+)
+{
+ load_image(idx);
+}
+
+// ----------------------------------------------------------------------------------------
+
+std::vector<dlib::image_display::overlay_rect> get_overlays (
+ const dlib::image_dataset_metadata::image& data,
+ color_mapper& string_to_color
+)
+{
+ std::vector<dlib::image_display::overlay_rect> temp(data.boxes.size());
+ for (unsigned long i = 0; i < temp.size(); ++i)
+ {
+ temp[i].rect = data.boxes[i].rect;
+ temp[i].label = data.boxes[i].label;
+ temp[i].parts = data.boxes[i].parts;
+ temp[i].crossed_out = data.boxes[i].ignore;
+ temp[i].color = string_to_color(data.boxes[i].label);
+ }
+ return temp;
+}
+
+// ----------------------------------------------------------------------------------------
+
+void metadata_editor::
+load_image(
+ unsigned long idx
+)
+{
+ if (idx >= metadata.images.size())
+ return;
+
+ image_pos = idx;
+
+ array2d<rgb_pixel> img;
+ display.clear_overlay();
+ try
+ {
+ dlib::load_image(img, metadata.images[idx].filename);
+ set_title(metadata.name + " #"+cast_to_string(idx)+": " +metadata.images[idx].filename);
+ }
+ catch (exception& e)
+ {
+ message_box("Error loading image", e.what());
+ }
+
+ if (display_equialized_image)
+ equalize_histogram(img);
+ display.set_image(img);
+ display.add_overlay(get_overlays(metadata.images[idx], string_to_color));
+}
+
+// ----------------------------------------------------------------------------------------
+
+void metadata_editor::
+load_image_and_set_size(
+ unsigned long idx
+)
+{
+ if (idx >= metadata.images.size())
+ return;
+
+ image_pos = idx;
+
+ array2d<rgb_pixel> img;
+ display.clear_overlay();
+ try
+ {
+ dlib::load_image(img, metadata.images[idx].filename);
+ set_title(metadata.name + " #"+cast_to_string(idx)+": " +metadata.images[idx].filename);
+ }
+ catch (exception& e)
+ {
+ message_box("Error loading image", e.what());
+ }
+
+
+ unsigned long screen_width, screen_height;
+ get_display_size(screen_width, screen_height);
+
+
+ unsigned long needed_width = display.left() + img.nc() + 4;
+ unsigned long needed_height = display.top() + img.nr() + 4;
+ if (needed_width < 300) needed_width = 300;
+ if (needed_height < 300) needed_height = 300;
+
+ if (needed_width > 100 + screen_width)
+ needed_width = screen_width - 100;
+ if (needed_height > 100 + screen_height)
+ needed_height = screen_height - 100;
+
+ set_size(needed_width, needed_height);
+
+
+ if (display_equialized_image)
+ equalize_histogram(img);
+ display.set_image(img);
+ display.add_overlay(get_overlays(metadata.images[idx], string_to_color));
+}
+
+// ----------------------------------------------------------------------------------------
+
+void metadata_editor::
+on_overlay_rects_changed(
+)
+{
+ using namespace dlib::image_dataset_metadata;
+ if (image_pos < metadata.images.size())
+ {
+ const std::vector<image_display::overlay_rect>& rects = display.get_overlay_rects();
+
+ std::vector<box>& boxes = metadata.images[image_pos].boxes;
+
+ boxes.clear();
+ for (unsigned long i = 0; i < rects.size(); ++i)
+ {
+ box temp;
+ temp.label = rects[i].label;
+ temp.rect = rects[i].rect;
+ temp.parts = rects[i].parts;
+ temp.ignore = rects[i].crossed_out;
+ boxes.push_back(temp);
+ }
+ }
+}
+
+// ----------------------------------------------------------------------------------------
+
+void metadata_editor::
+on_image_clicked(
+ const point& /*p*/, bool /*is_double_click*/, unsigned long /*btn*/
+)
+{
+ display.set_default_overlay_rect_color(string_to_color(trim(overlay_label.text())));
+}
+
+// ----------------------------------------------------------------------------------------
+
+void metadata_editor::
+on_overlay_label_changed(
+)
+{
+ display.set_default_overlay_rect_label(trim(overlay_label.text()));
+}
+
+// ----------------------------------------------------------------------------------------
+
+void metadata_editor::
+on_overlay_rect_selected(
+ const image_display::overlay_rect& orect
+)
+{
+ overlay_label.set_text(orect.label);
+ display.set_default_overlay_rect_label(orect.label);
+ display.set_default_overlay_rect_color(string_to_color(orect.label));
+}
+
+// ----------------------------------------------------------------------------------------
+
+void metadata_editor::
+display_about(
+)
+{
+ std::ostringstream sout;
+ sout << wrap_string("Image Labeler v" + string(VERSION) + "." ,0,0) << endl << endl;
+ sout << wrap_string("This program is a tool for labeling images with rectangles. " ,0,0) << endl << endl;
+
+ sout << wrap_string("You can add a new rectangle by holding the shift key, left clicking "
+ "the mouse, and dragging it. New rectangles are given the label from the \"Next Label\" "
+ "field at the top of the application. You can quickly edit the contents of the Next Label field "
+ "by hitting the tab key. Double clicking "
+ "a rectangle selects it and the delete key removes it. You can also mark "
+ "a rectangle as ignored by hitting the i or END keys when it is selected. Ignored "
+ "rectangles are visually displayed with an X through them. You can remove an image "
+ "entirely by selecting it in the list on the left and pressing alt+d."
+ ,0,0) << endl << endl;
+
+ sout << wrap_string("It is also possible to label object parts by selecting a rectangle and "
+ "then right clicking. A popup menu will appear and you can select a part label. "
+ "Note that you must define the allowable part labels by giving --parts on the "
+ "command line. An example would be '--parts \"leye reye nose mouth\"'."
+ ,0,0) << endl << endl;
+
+ sout << wrap_string("Press the down or s key to select the next image in the list and the up or w "
+ "key to select the previous one.",0,0) << endl << endl;
+
+ sout << wrap_string("Additionally, you can hold ctrl and then scroll the mouse wheel to zoom. A normal left click "
+ "and drag allows you to navigate around the image. Holding ctrl and "
+ "left clicking a rectangle will give it the label from the Next Label field. "
+ "Holding shift + right click and then dragging allows you to move things around. "
+ "Holding ctrl and pressing the up or down keyboard keys will propagate "
+ "rectangle labels from one image to the next and also skip empty images. "
+ "Similarly, holding ctrl+shift will propagate entire boxes via a visual tracking "
+ "algorithm from one image to the next. "
+ "Finally, typing a number on the keyboard will jump you to a specific image.",0,0) << endl << endl;
+
+ sout << wrap_string("You can also toggle image histogram equalization by pressing the e key."
+ ,0,0) << endl;
+
+
+ message_box("About Image Labeler",sout.str());
+}
+
+// ----------------------------------------------------------------------------------------
+
diff --git a/ml/dlib/tools/imglab/src/metadata_editor.h b/ml/dlib/tools/imglab/src/metadata_editor.h
new file mode 100644
index 000000000..71aa14ace
--- /dev/null
+++ b/ml/dlib/tools/imglab/src/metadata_editor.h
@@ -0,0 +1,116 @@
+// Copyright (C) 2011 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_METADATA_EdITOR_H__
+#define DLIB_METADATA_EdITOR_H__
+
+#include <dlib/gui_widgets.h>
+#include <dlib/data_io.h>
+#include <dlib/pixel.h>
+#include <map>
+
+// ----------------------------------------------------------------------------------------
+
+class color_mapper
+{
+public:
+
+ dlib::rgb_alpha_pixel operator() (
+ const std::string& str
+ )
+ {
+ auto i = colors.find(str);
+ if (i != colors.end())
+ {
+ return i->second;
+ }
+ else
+ {
+ using namespace dlib;
+ hsi_pixel pix;
+ pix.h = reverse(colors.size());
+ pix.s = 255;
+ pix.i = 150;
+ rgb_alpha_pixel result;
+ assign_pixel(result, pix);
+ colors[str] = result;
+ return result;
+ }
+ }
+
+private:
+
+ // We use a bit reverse here because it causes us to evenly spread the colors as we
+ // allocated them. First the colors are maximally different, then become interleaved
+ // and progressively more similar as they are allocated.
+ unsigned char reverse(unsigned char b)
+ {
+ // reverse the order of the bits in b.
+ b = ((b * 0x0802LU & 0x22110LU) | (b * 0x8020LU & 0x88440LU)) * 0x10101LU >> 16;
+ return b;
+ }
+
+ std::map<std::string, dlib::rgb_alpha_pixel> colors;
+};
+
+// ----------------------------------------------------------------------------------------
+
+class metadata_editor : public dlib::drawable_window
+{
+public:
+ metadata_editor(
+ const std::string& filename_
+ );
+
+ ~metadata_editor();
+
+ void add_labelable_part_name (
+ const std::string& name
+ );
+
+private:
+
+ void file_save();
+ void file_save_as();
+ void remove_selected_images();
+
+ virtual void on_window_resized();
+ virtual void on_keydown (
+ unsigned long key,
+ bool is_printable,
+ unsigned long state
+ );
+
+ void on_lb_images_clicked(unsigned long idx);
+ void select_image(unsigned long idx);
+ void save_metadata_to_file (const std::string& file);
+ void load_image(unsigned long idx);
+ void load_image_and_set_size(unsigned long idx);
+ void on_image_clicked(const dlib::point& p, bool is_double_click, unsigned long btn);
+ void on_overlay_rects_changed();
+ void on_overlay_label_changed();
+ void on_overlay_rect_selected(const dlib::image_display::overlay_rect& orect);
+
+ void display_about();
+
+ std::string filename;
+ dlib::image_dataset_metadata::dataset metadata;
+
+ dlib::menu_bar mbar;
+ dlib::list_box lb_images;
+ unsigned long image_pos;
+
+ dlib::image_display display;
+ dlib::label overlay_label_name;
+ dlib::text_field overlay_label;
+
+ unsigned long keyboard_jump_pos;
+ time_t last_keyboard_jump_pos_update;
+ bool display_equialized_image = false;
+ color_mapper string_to_color;
+};
+
+// ----------------------------------------------------------------------------------------
+
+
+#endif // DLIB_METADATA_EdITOR_H__
+