From c21c3b0befeb46a51b6bf3758ffa30813bea0ff0 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 9 Mar 2024 14:19:22 +0100 Subject: Adding upstream version 1.44.3. Signed-off-by: Daniel Baumann --- ml/dlib/tools/imglab/src/cluster.cpp | 260 +++++ ml/dlib/tools/imglab/src/cluster.h | 11 + ml/dlib/tools/imglab/src/common.cpp | 60 ++ ml/dlib/tools/imglab/src/common.h | 45 + ml/dlib/tools/imglab/src/convert_idl.cpp | 184 ++++ ml/dlib/tools/imglab/src/convert_idl.h | 14 + ml/dlib/tools/imglab/src/convert_pascal_v1.cpp | 177 ++++ ml/dlib/tools/imglab/src/convert_pascal_v1.h | 13 + ml/dlib/tools/imglab/src/convert_pascal_xml.cpp | 239 +++++ ml/dlib/tools/imglab/src/convert_pascal_xml.h | 12 + ml/dlib/tools/imglab/src/flip_dataset.cpp | 249 +++++ ml/dlib/tools/imglab/src/flip_dataset.h | 12 + ml/dlib/tools/imglab/src/main.cpp | 1145 +++++++++++++++++++++++ ml/dlib/tools/imglab/src/metadata_editor.cpp | 671 +++++++++++++ ml/dlib/tools/imglab/src/metadata_editor.h | 116 +++ 15 files changed, 3208 insertions(+) create mode 100644 ml/dlib/tools/imglab/src/cluster.cpp create mode 100644 ml/dlib/tools/imglab/src/cluster.h create mode 100644 ml/dlib/tools/imglab/src/common.cpp create mode 100644 ml/dlib/tools/imglab/src/common.h create mode 100644 ml/dlib/tools/imglab/src/convert_idl.cpp create mode 100644 ml/dlib/tools/imglab/src/convert_idl.h create mode 100644 ml/dlib/tools/imglab/src/convert_pascal_v1.cpp create mode 100644 ml/dlib/tools/imglab/src/convert_pascal_v1.h create mode 100644 ml/dlib/tools/imglab/src/convert_pascal_xml.cpp create mode 100644 ml/dlib/tools/imglab/src/convert_pascal_xml.h create mode 100644 ml/dlib/tools/imglab/src/flip_dataset.cpp create mode 100644 ml/dlib/tools/imglab/src/flip_dataset.h create mode 100644 ml/dlib/tools/imglab/src/main.cpp create mode 100644 ml/dlib/tools/imglab/src/metadata_editor.cpp create mode 100644 ml/dlib/tools/imglab/src/metadata_editor.h (limited to 'ml/dlib/tools/imglab/src') diff --git a/ml/dlib/tools/imglab/src/cluster.cpp b/ml/dlib/tools/imglab/src/cluster.cpp new file mode 100644 index 000000000..23b289a7f --- /dev/null +++ b/ml/dlib/tools/imglab/src/cluster.cpp @@ -0,0 +1,260 @@ +// Copyright (C) 2015 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. + +#include "cluster.h" +#include +#include +#include +#include +#include +#include +#include +#include + +// ---------------------------------------------------------------------------------------- + +using namespace std; +using namespace dlib; + +// ---------------------------------------------------------------------------- + +struct assignment +{ + unsigned long c; + double dist; + unsigned long idx; + + bool operator<(const assignment& item) const + { return dist < item.dist; } +}; + +std::vector angular_cluster ( + std::vector > feats, + const unsigned long num_clusters +) +{ + DLIB_CASSERT(feats.size() != 0, "The dataset can't be empty"); + for (unsigned long i = 0; i < feats.size(); ++i) + { + DLIB_CASSERT(feats[i].size() == feats[0].size(), "All feature vectors must have the same length."); + } + + // find the centroid of feats + matrix m; + for (unsigned long i = 0; i < feats.size(); ++i) + m += feats[i]; + m /= feats.size(); + + // Now center feats and then project onto the unit sphere. The reason for projecting + // onto the unit sphere is so pick_initial_centers() works in a sensible way. + for (unsigned long i = 0; i < feats.size(); ++i) + { + feats[i] -= m; + double len = length(feats[i]); + if (len != 0) + feats[i] /= len; + } + + // now do angular clustering of the points + std::vector > centers; + pick_initial_centers(num_clusters, centers, feats, linear_kernel >(), 0.05); + find_clusters_using_angular_kmeans(feats, centers); + + // and then report the resulting assignments + std::vector assignments; + for (unsigned long i = 0; i < feats.size(); ++i) + { + assignment temp; + temp.c = nearest_center(centers, feats[i]); + temp.dist = length(feats[i] - centers[temp.c]); + temp.idx = i; + assignments.push_back(temp); + } + return assignments; +} + +// ---------------------------------------------------------------------------------------- + +bool compare_first ( + const std::pair& a, + const std::pair& b +) +{ + return a.first < b.first; +} + +// ---------------------------------------------------------------------------------------- + +double mean_aspect_ratio ( + const image_dataset_metadata::dataset& data +) +{ + double sum = 0; + double cnt = 0; + for (unsigned long i = 0; i < data.images.size(); ++i) + { + for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j) + { + rectangle rect = data.images[i].boxes[j].rect; + if (rect.area() == 0 || data.images[i].boxes[j].ignore) + continue; + sum += rect.width()/(double)rect.height(); + ++cnt; + } + } + + if (cnt != 0) + return sum/cnt; + else + return 0; +} + +// ---------------------------------------------------------------------------------------- + +bool has_non_ignored_boxes (const image_dataset_metadata::image& img) +{ + for (auto&& b : img.boxes) + { + if (!b.ignore) + return true; + } + return false; +} + +// ---------------------------------------------------------------------------------------- + +int cluster_dataset( + const dlib::command_line_parser& parser +) +{ + // make sure the user entered an argument to this program + if (parser.number_of_arguments() != 1) + { + cerr << "The --cluster option requires you to give one XML file on the command line." << endl; + return EXIT_FAILURE; + } + + const unsigned long num_clusters = get_option(parser, "cluster", 2); + const unsigned long chip_size = get_option(parser, "size", 8000); + + image_dataset_metadata::dataset data; + + image_dataset_metadata::load_image_dataset_metadata(data, parser[0]); + set_current_dir(get_parent_directory(file(parser[0]))); + + const double aspect_ratio = mean_aspect_ratio(data); + + dlib::array > images; + std::vector > feats; + console_progress_indicator pbar(data.images.size()); + // extract all the object chips and HOG features. + cout << "Loading image data..." << endl; + for (unsigned long i = 0; i < data.images.size(); ++i) + { + pbar.print_status(i); + if (!has_non_ignored_boxes(data.images[i])) + continue; + + array2d img, chip; + load_image(img, data.images[i].filename); + + for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j) + { + if (data.images[i].boxes[j].ignore || data.images[i].boxes[j].rect.area() < 10) + continue; + drectangle rect = data.images[i].boxes[j].rect; + rect = set_aspect_ratio(rect, aspect_ratio); + extract_image_chip(img, chip_details(rect, chip_size), chip); + feats.push_back(extract_fhog_features(chip)); + images.push_back(chip); + } + } + + if (feats.size() == 0) + { + cerr << "No non-ignored object boxes found in the XML dataset. You can't cluster an empty dataset." << endl; + return EXIT_FAILURE; + } + + cout << "\nClustering objects..." << endl; + std::vector assignments = angular_cluster(feats, num_clusters); + + + // Now output each cluster to disk as an XML file. + for (unsigned long c = 0; c < num_clusters; ++c) + { + // We are going to accumulate all the image metadata for cluster c. We put it + // into idata so we can sort the images such that images with central chips + // come before less central chips. The idea being to get the good chips to + // show up first in the listing, making it easy to manually remove bad ones if + // that is desired. + std::vector > idata(data.images.size()); + unsigned long idx = 0; + for (unsigned long i = 0; i < data.images.size(); ++i) + { + idata[i].first = std::numeric_limits::infinity(); + idata[i].second.filename = data.images[i].filename; + if (!has_non_ignored_boxes(data.images[i])) + continue; + + for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j) + { + idata[i].second.boxes.push_back(data.images[i].boxes[j]); + + if (data.images[i].boxes[j].ignore || data.images[i].boxes[j].rect.area() < 10) + continue; + + // If this box goes into cluster c then update the score for the whole + // image based on this boxes' score. Otherwise, mark the box as + // ignored. + if (assignments[idx].c == c) + idata[i].first = std::min(idata[i].first, assignments[idx].dist); + else + idata[i].second.boxes.back().ignore = true; + + ++idx; + } + } + + // now save idata to an xml file. + std::sort(idata.begin(), idata.end(), compare_first); + image_dataset_metadata::dataset cdata; + cdata.comment = data.comment + "\n\n This file contains objects which were clustered into group " + + cast_to_string(c+1) + " of " + cast_to_string(num_clusters) + " groups with a chip size of " + + cast_to_string(chip_size) + " by imglab."; + cdata.name = data.name; + for (unsigned long i = 0; i < idata.size(); ++i) + { + // if this image has non-ignored boxes in it then include it in the output. + if (idata[i].first != std::numeric_limits::infinity()) + cdata.images.push_back(idata[i].second); + } + + string outfile = "cluster_"+pad_int_with_zeros(c+1, 3) + ".xml"; + cout << "Saving " << outfile << endl; + save_image_dataset_metadata(cdata, outfile); + } + + // Now output each cluster to disk as a big tiled jpeg file. Sort everything so, just + // like in the xml file above, the best objects come first in the tiling. + std::sort(assignments.begin(), assignments.end()); + for (unsigned long c = 0; c < num_clusters; ++c) + { + dlib::array > temp; + for (unsigned long i = 0; i < assignments.size(); ++i) + { + if (assignments[i].c == c) + temp.push_back(images[assignments[i].idx]); + } + + string outfile = "cluster_"+pad_int_with_zeros(c+1, 3) + ".jpg"; + cout << "Saving " << outfile << endl; + save_jpeg(tile_images(temp), outfile); + } + + + return EXIT_SUCCESS; +} + +// ---------------------------------------------------------------------------------------- + diff --git a/ml/dlib/tools/imglab/src/cluster.h b/ml/dlib/tools/imglab/src/cluster.h new file mode 100644 index 000000000..6cb41a373 --- /dev/null +++ b/ml/dlib/tools/imglab/src/cluster.h @@ -0,0 +1,11 @@ +// Copyright (C) 2015 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_IMGLAB_ClUSTER_H_ +#define DLIB_IMGLAB_ClUSTER_H_ + +#include + +int cluster_dataset(const dlib::command_line_parser& parser); + +#endif //DLIB_IMGLAB_ClUSTER_H_ + diff --git a/ml/dlib/tools/imglab/src/common.cpp b/ml/dlib/tools/imglab/src/common.cpp new file mode 100644 index 000000000..d9cc1dca4 --- /dev/null +++ b/ml/dlib/tools/imglab/src/common.cpp @@ -0,0 +1,60 @@ +// Copyright (C) 2011 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. + +#include "common.h" +#include +#include + +// ---------------------------------------------------------------------------------------- + +std::string strip_path ( + const std::string& str, + const std::string& prefix +) +{ + unsigned long i; + for (i = 0; i < str.size() && i < prefix.size(); ++i) + { + if (str[i] != prefix[i]) + return str; + } + + if (i < str.size() && (str[i] == '/' || str[i] == '\\')) + ++i; + + return str.substr(i); +} + +// ---------------------------------------------------------------------------------------- + +void make_empty_file ( + const std::string& filename +) +{ + std::ofstream fout(filename.c_str()); + if (!fout) + throw dlib::error("ERROR: Unable to open " + filename + " for writing."); +} + +// ---------------------------------------------------------------------------------------- + +std::string to_png_name (const std::string& filename) +{ + std::string::size_type pos = filename.find_last_of("."); + if (pos == std::string::npos) + throw dlib::error("invalid filename: " + filename); + return filename.substr(0,pos) + ".png"; +} + +// ---------------------------------------------------------------------------------------- + +std::string to_jpg_name (const std::string& filename) +{ + std::string::size_type pos = filename.find_last_of("."); + if (pos == std::string::npos) + throw dlib::error("invalid filename: " + filename); + return filename.substr(0,pos) + ".jpg"; +} + +// ---------------------------------------------------------------------------------------- + diff --git a/ml/dlib/tools/imglab/src/common.h b/ml/dlib/tools/imglab/src/common.h new file mode 100644 index 000000000..42e905bc3 --- /dev/null +++ b/ml/dlib/tools/imglab/src/common.h @@ -0,0 +1,45 @@ +// Copyright (C) 2011 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_IMGLAB_COmMON_H__ +#define DLIB_IMGLAB_COmMON_H__ + +#include + +// ---------------------------------------------------------------------------------------- + +std::string strip_path ( + const std::string& str, + const std::string& prefix +); +/*! + ensures + - if (prefix is a prefix of str) then + - returns the part of str after the prefix + (additionally, str will not begin with a / or \ character) + - else + - return str +!*/ + +// ---------------------------------------------------------------------------------------- + +void make_empty_file ( + const std::string& filename +); +/*! + ensures + - creates an empty file of the given name +!*/ + +// ---------------------------------------------------------------------------------------- + +std::string to_png_name (const std::string& filename); +std::string to_jpg_name (const std::string& filename); + +// ---------------------------------------------------------------------------------------- + +const int JPEG_QUALITY = 90; + +// ---------------------------------------------------------------------------------------- + +#endif // DLIB_IMGLAB_COmMON_H__ + diff --git a/ml/dlib/tools/imglab/src/convert_idl.cpp b/ml/dlib/tools/imglab/src/convert_idl.cpp new file mode 100644 index 000000000..7ff601d0c --- /dev/null +++ b/ml/dlib/tools/imglab/src/convert_idl.cpp @@ -0,0 +1,184 @@ + +#include "convert_idl.h" +#include "dlib/data_io.h" +#include +#include +#include +#include +#include + +using namespace std; +using namespace dlib; + +namespace +{ + using namespace dlib::image_dataset_metadata; + +// ---------------------------------------------------------------------------------------- + + inline bool next_is_number(std::istream& in) + { + return ('0' <= in.peek() && in.peek() <= '9') || in.peek() == '-' || in.peek() == '+'; + } + + int read_int(std::istream& in) + { + bool is_neg = false; + if (in.peek() == '-') + { + is_neg = true; + in.get(); + } + if (in.peek() == '+') + in.get(); + + int val = 0; + while ('0' <= in.peek() && in.peek() <= '9') + { + val = 10*val + in.get()-'0'; + } + + if (is_neg) + return -val; + else + return val; + } + +// ---------------------------------------------------------------------------------------- + + void parse_annotation_file( + const std::string& file, + dlib::image_dataset_metadata::dataset& data + ) + { + ifstream fin(file.c_str()); + if (!fin) + throw dlib::error("Unable to open file " + file); + + + bool in_quote = false; + int point_count = 0; + bool in_point_list = false; + bool saw_any_points = false; + + image img; + string label; + point p1,p2; + while (fin.peek() != EOF) + { + if (in_point_list && next_is_number(fin)) + { + const int val = read_int(fin); + switch (point_count) + { + case 0: p1.x() = val; break; + case 1: p1.y() = val; break; + case 2: p2.x() = val; break; + case 3: p2.y() = val; break; + default: + throw dlib::error("parse error in file " + file); + } + + ++point_count; + } + + char ch = fin.get(); + + if (ch == ':') + continue; + + if (ch == '"') + { + in_quote = !in_quote; + continue; + } + + if (in_quote) + { + img.filename += ch; + continue; + } + + + if (ch == '(') + { + in_point_list = true; + point_count = 0; + label.clear(); + saw_any_points = true; + } + if (ch == ')') + { + in_point_list = false; + + label.clear(); + while (fin.peek() != EOF && + fin.peek() != ';' && + fin.peek() != ',') + { + char ch = fin.get(); + if (ch == ':') + continue; + + label += ch; + } + } + + if (ch == ',' && !in_point_list) + { + + box b; + b.rect = rectangle(p1,p2); + b.label = label; + img.boxes.push_back(b); + } + + + if (ch == ';') + { + + if (saw_any_points) + { + box b; + b.rect = rectangle(p1,p2); + b.label = label; + img.boxes.push_back(b); + saw_any_points = false; + } + data.images.push_back(img); + + + img.filename.clear(); + img.boxes.clear(); + } + + + } + + + + } + +// ---------------------------------------------------------------------------------------- + +} + +void convert_idl( + const command_line_parser& parser +) +{ + cout << "Convert from IDL annotation format..." << endl; + + dlib::image_dataset_metadata::dataset dataset; + + for (unsigned long i = 0; i < parser.number_of_arguments(); ++i) + { + parse_annotation_file(parser[i], dataset); + } + + const std::string filename = parser.option("c").argument(); + save_image_dataset_metadata(dataset, filename); +} + + + diff --git a/ml/dlib/tools/imglab/src/convert_idl.h b/ml/dlib/tools/imglab/src/convert_idl.h new file mode 100644 index 000000000..d8c33d961 --- /dev/null +++ b/ml/dlib/tools/imglab/src/convert_idl.h @@ -0,0 +1,14 @@ +// Copyright (C) 2011 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_IMGLAB_CONVErT_IDL_H__ +#define DLIB_IMGLAB_CONVErT_IDL_H__ + +#include "common.h" +#include + +void convert_idl(const dlib::command_line_parser& parser); + +#endif // DLIB_IMGLAB_CONVErT_IDL_H__ + + + diff --git a/ml/dlib/tools/imglab/src/convert_pascal_v1.cpp b/ml/dlib/tools/imglab/src/convert_pascal_v1.cpp new file mode 100644 index 000000000..8eaf5e2bb --- /dev/null +++ b/ml/dlib/tools/imglab/src/convert_pascal_v1.cpp @@ -0,0 +1,177 @@ + +#include "convert_pascal_v1.h" +#include "dlib/data_io.h" +#include +#include +#include +#include + +using namespace std; +using namespace dlib; + +namespace +{ + using namespace dlib::image_dataset_metadata; + +// ---------------------------------------------------------------------------------------- + + std::string pick_out_quoted_string ( + const std::string& str + ) + { + std::string temp; + bool in_quotes = false; + for (unsigned long i = 0; i < str.size(); ++i) + { + if (str[i] == '"') + { + in_quotes = !in_quotes; + } + else if (in_quotes) + { + temp += str[i]; + } + } + + return temp; + } + +// ---------------------------------------------------------------------------------------- + + void parse_annotation_file( + const std::string& file, + dlib::image_dataset_metadata::image& img, + std::string& dataset_name + ) + { + ifstream fin(file.c_str()); + if (!fin) + throw dlib::error("Unable to open file " + file); + + img = dlib::image_dataset_metadata::image(); + + string str, line; + std::vector words; + while (fin.peek() != EOF) + { + getline(fin, line); + words = split(line, " \r\n\t:(,-)\""); + if (words.size() > 2) + { + if (words[0] == "#") + continue; + + if (words[0] == "Image" && words[1] == "filename") + { + img.filename = pick_out_quoted_string(line); + } + else if (words[0] == "Database") + { + dataset_name = pick_out_quoted_string(line); + } + else if (words[0] == "Objects" && words[1] == "with" && words.size() >= 5) + { + const int num = sa = words[4]; + img.boxes.resize(num); + } + else if (words.size() > 4 && (words[2] == "for" || words[2] == "on") && words[3] == "object") + { + long idx = sa = words[4]; + --idx; + if (idx >= (long)img.boxes.size()) + throw dlib::error("Invalid object id number of " + words[4]); + + if (words[0] == "Center" && words[1] == "point" && words.size() > 9) + { + const long x = sa = words[8]; + const long y = sa = words[9]; + img.boxes[idx].parts["head"] = point(x,y); + } + else if (words[0] == "Bounding" && words[1] == "box" && words.size() > 13) + { + rectangle rect; + img.boxes[idx].rect.left() = sa = words[10]; + img.boxes[idx].rect.top() = sa = words[11]; + img.boxes[idx].rect.right() = sa = words[12]; + img.boxes[idx].rect.bottom() = sa = words[13]; + } + else if (words[0] == "Original" && words[1] == "label" && words.size() > 6) + { + img.boxes[idx].label = words[6]; + } + } + } + + } + } + +// ---------------------------------------------------------------------------------------- + + std::string figure_out_full_path_to_image ( + const std::string& annotation_file, + const std::string& image_name + ) + { + directory parent = get_parent_directory(file(annotation_file)); + + + string temp; + while (true) + { + if (parent.is_root()) + temp = parent.full_name() + image_name; + else + temp = parent.full_name() + directory::get_separator() + image_name; + + if (file_exists(temp)) + return temp; + + if (parent.is_root()) + throw dlib::error("Can't figure out where the file " + image_name + " is located."); + parent = get_parent_directory(parent); + } + } + +// ---------------------------------------------------------------------------------------- + +} + +void convert_pascal_v1( + const command_line_parser& parser +) +{ + cout << "Convert from PASCAL v1.00 annotation format..." << endl; + + dlib::image_dataset_metadata::dataset dataset; + + std::string name; + dlib::image_dataset_metadata::image img; + + const std::string filename = parser.option("c").argument(); + // make sure the file exists so we can use the get_parent_directory() command to + // figure out it's parent directory. + make_empty_file(filename); + const std::string parent_dir = get_parent_directory(file(filename)).full_name(); + + for (unsigned long i = 0; i < parser.number_of_arguments(); ++i) + { + try + { + parse_annotation_file(parser[i], img, name); + + dataset.name = name; + img.filename = strip_path(figure_out_full_path_to_image(parser[i], img.filename), parent_dir); + dataset.images.push_back(img); + + } + catch (exception& ) + { + cout << "Error while processing file " << parser[i] << endl << endl; + throw; + } + } + + save_image_dataset_metadata(dataset, filename); +} + + diff --git a/ml/dlib/tools/imglab/src/convert_pascal_v1.h b/ml/dlib/tools/imglab/src/convert_pascal_v1.h new file mode 100644 index 000000000..3553d03a7 --- /dev/null +++ b/ml/dlib/tools/imglab/src/convert_pascal_v1.h @@ -0,0 +1,13 @@ +// Copyright (C) 2011 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_IMGLAB_CONVERT_PASCAl_V1_H__ +#define DLIB_IMGLAB_CONVERT_PASCAl_V1_H__ + +#include "common.h" +#include + +void convert_pascal_v1(const dlib::command_line_parser& parser); + +#endif // DLIB_IMGLAB_CONVERT_PASCAl_V1_H__ + + diff --git a/ml/dlib/tools/imglab/src/convert_pascal_xml.cpp b/ml/dlib/tools/imglab/src/convert_pascal_xml.cpp new file mode 100644 index 000000000..c699d7777 --- /dev/null +++ b/ml/dlib/tools/imglab/src/convert_pascal_xml.cpp @@ -0,0 +1,239 @@ + +#include "convert_pascal_xml.h" +#include "dlib/data_io.h" +#include +#include +#include +#include +#include + +using namespace std; +using namespace dlib; + +namespace +{ + using namespace dlib::image_dataset_metadata; + +// ---------------------------------------------------------------------------------------- + + class doc_handler : public document_handler + { + image& temp_image; + std::string& dataset_name; + + std::vector ts; + box temp_box; + + public: + + doc_handler( + image& temp_image_, + std::string& dataset_name_ + ): + temp_image(temp_image_), + dataset_name(dataset_name_) + {} + + + virtual void start_document ( + ) + { + ts.clear(); + temp_image = image(); + temp_box = box(); + dataset_name.clear(); + } + + virtual void end_document ( + ) + { + } + + virtual void start_element ( + const unsigned long , + const std::string& name, + const dlib::attribute_list& + ) + { + if (ts.size() == 0 && name != "annotation") + { + std::ostringstream sout; + sout << "Invalid XML document. Root tag must be . Found <" << name << "> instead."; + throw dlib::error(sout.str()); + } + + + ts.push_back(name); + } + + virtual void end_element ( + const unsigned long , + const std::string& name + ) + { + ts.pop_back(); + if (ts.size() == 0) + return; + + if (name == "object" && ts.back() == "annotation") + { + temp_image.boxes.push_back(temp_box); + temp_box = box(); + } + } + + virtual void characters ( + const std::string& data + ) + { + if (ts.size() == 2 && ts[1] == "filename") + { + temp_image.filename = trim(data); + } + else if (ts.size() == 3 && ts[2] == "database" && ts[1] == "source") + { + dataset_name = trim(data); + } + else if (ts.size() >= 3) + { + if (ts[ts.size()-2] == "bndbox" && ts[ts.size()-3] == "object") + { + if (ts.back() == "xmin") temp_box.rect.left() = string_cast(data); + else if (ts.back() == "ymin") temp_box.rect.top() = string_cast(data); + else if (ts.back() == "xmax") temp_box.rect.right() = string_cast(data); + else if (ts.back() == "ymax") temp_box.rect.bottom() = string_cast(data); + } + else if (ts.back() == "name" && ts[ts.size()-2] == "object") + { + temp_box.label = trim(data); + } + else if (ts.back() == "difficult" && ts[ts.size()-2] == "object") + { + if (trim(data) == "0" || trim(data) == "false") + { + temp_box.difficult = false; + } + else + { + temp_box.difficult = true; + } + } + else if (ts.back() == "truncated" && ts[ts.size()-2] == "object") + { + if (trim(data) == "0" || trim(data) == "false") + { + temp_box.truncated = false; + } + else + { + temp_box.truncated = true; + } + } + else if (ts.back() == "occluded" && ts[ts.size()-2] == "object") + { + if (trim(data) == "0" || trim(data) == "false") + { + temp_box.occluded = false; + } + else + { + temp_box.occluded = true; + } + } + + } + } + + virtual void processing_instruction ( + const unsigned long , + const std::string& , + const std::string& + ) + { + } + }; + +// ---------------------------------------------------------------------------------------- + + class xml_error_handler : public error_handler + { + public: + virtual void error ( + const unsigned long + ) { } + + virtual void fatal_error ( + const unsigned long line_number + ) + { + std::ostringstream sout; + sout << "There is a fatal error on line " << line_number << " so parsing will now halt."; + throw dlib::error(sout.str()); + } + }; + +// ---------------------------------------------------------------------------------------- + + void parse_annotation_file( + const std::string& file, + dlib::image_dataset_metadata::image& img, + std::string& dataset_name + ) + { + doc_handler dh(img, dataset_name); + xml_error_handler eh; + + xml_parser::kernel_1a parser; + parser.add_document_handler(dh); + parser.add_error_handler(eh); + + ifstream fin(file.c_str()); + if (!fin) + throw dlib::error("Unable to open file " + file); + parser.parse(fin); + } + +// ---------------------------------------------------------------------------------------- + +} + +void convert_pascal_xml( + const command_line_parser& parser +) +{ + cout << "Convert from PASCAL XML annotation format..." << endl; + + dlib::image_dataset_metadata::dataset dataset; + + std::string name; + dlib::image_dataset_metadata::image img; + + const std::string filename = parser.option("c").argument(); + // make sure the file exists so we can use the get_parent_directory() command to + // figure out it's parent directory. + make_empty_file(filename); + const std::string parent_dir = get_parent_directory(file(filename)).full_name(); + + for (unsigned long i = 0; i < parser.number_of_arguments(); ++i) + { + try + { + parse_annotation_file(parser[i], img, name); + const string root = get_parent_directory(get_parent_directory(file(parser[i]))).full_name(); + const string img_path = root + directory::get_separator() + "JPEGImages" + directory::get_separator(); + + dataset.name = name; + img.filename = strip_path(img_path + img.filename, parent_dir); + dataset.images.push_back(img); + + } + catch (exception& ) + { + cout << "Error while processing file " << parser[i] << endl << endl; + throw; + } + } + + save_image_dataset_metadata(dataset, filename); +} + diff --git a/ml/dlib/tools/imglab/src/convert_pascal_xml.h b/ml/dlib/tools/imglab/src/convert_pascal_xml.h new file mode 100644 index 000000000..01ee1e82f --- /dev/null +++ b/ml/dlib/tools/imglab/src/convert_pascal_xml.h @@ -0,0 +1,12 @@ +// Copyright (C) 2011 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_IMGLAB_CONVERT_PASCAl_XML_H__ +#define DLIB_IMGLAB_CONVERT_PASCAl_XML_H__ + +#include "common.h" +#include + +void convert_pascal_xml(const dlib::command_line_parser& parser); + +#endif // DLIB_IMGLAB_CONVERT_PASCAl_XML_H__ + diff --git a/ml/dlib/tools/imglab/src/flip_dataset.cpp b/ml/dlib/tools/imglab/src/flip_dataset.cpp new file mode 100644 index 000000000..e072dc790 --- /dev/null +++ b/ml/dlib/tools/imglab/src/flip_dataset.cpp @@ -0,0 +1,249 @@ +// Copyright (C) 2011 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. + +#include "flip_dataset.h" +#include +#include +#include +#include "common.h" +#include +#include +#include + +using namespace dlib; +using namespace std; + +// ---------------------------------------------------------------------------------------- + +std::vector align_points( + const std::vector& from, + const std::vector& to, + double min_angle = -90*pi/180.0, + double max_angle = 90*pi/180.0, + long num_angles = 181 +) +/*! + ensures + - Figures out how to align the points in from with the points in to. Returns an + assignment array A that indicates that from[i] matches with to[A[i]]. + + We use the Hungarian algorithm with a search over reasonable angles. This method + works because we just need to account for a translation and a mild rotation and + nothing else. If there is any other more complex mapping then you probably don't + have landmarks that make sense to flip. +!*/ +{ + DLIB_CASSERT(from.size() == to.size()); + + std::vector best_assignment; + double best_assignment_cost = std::numeric_limits::infinity(); + + matrix dists(from.size(), to.size()); + matrix idists; + + for (auto angle : linspace(min_angle, max_angle, num_angles)) + { + auto rot = rotation_matrix(angle); + for (long r = 0; r < dists.nr(); ++r) + { + for (long c = 0; c < dists.nc(); ++c) + { + dists(r,c) = length_squared(rot*from[r]-to[c]); + } + } + + idists = matrix_cast(-round(std::numeric_limits::max()*(dists/max(dists)))); + + auto assignment = max_cost_assignment(idists); + auto cost = assignment_cost(dists, assignment); + if (cost < best_assignment_cost) + { + best_assignment_cost = cost; + best_assignment = std::move(assignment); + } + } + + + // Now compute the alignment error in terms of average distance moved by each part. We + // do this so we can give the user a warning if it's impossible to make a good + // alignment. + running_stats rs; + std::vector tmp(to.size()); + for (size_t i = 0; i < to.size(); ++i) + tmp[best_assignment[i]] = to[i]; + auto tform = find_similarity_transform(from, tmp); + for (size_t i = 0; i < from.size(); ++i) + rs.add(length(tform(from[i])-tmp[i])); + if (rs.mean() > 0.05) + { + cout << "WARNING, your dataset has object part annotations and you asked imglab to " << endl; + cout << "flip the data. Imglab tried to adjust the part labels so that the average" << endl; + cout << "part layout in the flipped dataset is the same as the source dataset. " << endl; + cout << "However, the part annotation scheme doesn't seem to be left-right symmetric." << endl; + cout << "You should manually review the output to make sure the part annotations are " << endl; + cout << "labeled as you expect." << endl; + } + + + return best_assignment; +} + +// ---------------------------------------------------------------------------------------- + +std::map normalized_parts ( + const image_dataset_metadata::box& b +) +{ + auto tform = dlib::impl::normalizing_tform(b.rect); + std::map temp; + for (auto& p : b.parts) + temp[p.first] = tform(p.second); + return temp; +} + +// ---------------------------------------------------------------------------------------- + +std::map average_parts ( + const image_dataset_metadata::dataset& data +) +/*! + ensures + - returns the average part layout over all objects in data. This is done by + centering the parts inside their rects and then averaging all the objects. +!*/ +{ + std::map psum; + std::map pcnt; + for (auto& image : data.images) + { + for (auto& box : image.boxes) + { + for (auto& p : normalized_parts(box)) + { + psum[p.first] += p.second; + pcnt[p.first] += 1; + } + } + } + + // make into an average + for (auto& p : psum) + p.second /= pcnt[p.first]; + + return psum; +} + +// ---------------------------------------------------------------------------------------- + +void make_part_labeling_match_target_dataset ( + const image_dataset_metadata::dataset& target, + image_dataset_metadata::dataset& data +) +/*! + This function tries to adjust the part labels in data so that the average part layout + in data is the same as target, according to the string labels. Therefore, it doesn't + adjust part positions, instead it changes the string labels on the parts to achieve + this. This really only makes sense when you flipped a dataset that contains left-right + symmetric objects and you want to remap the part labels of the flipped data so that + they match the unflipped data's annotation scheme. +!*/ +{ + auto target_parts = average_parts(target); + auto data_parts = average_parts(data); + + // Convert to a form align_points() understands. We also need to keep track of the + // labels for later. + std::vector from, to; + std::vector from_labels, to_labels; + for (auto& p : target_parts) + { + from_labels.emplace_back(p.first); + from.emplace_back(p.second); + } + for (auto& p : data_parts) + { + to_labels.emplace_back(p.first); + to.emplace_back(p.second); + } + + auto assignment = align_points(from, to); + // so now we know that from_labels[i] should replace to_labels[assignment[i]] + std::map label_mapping; + for (size_t i = 0; i < assignment.size(); ++i) + label_mapping[to_labels[assignment[i]]] = from_labels[i]; + + // now apply the label mapping to the dataset + for (auto& image : data.images) + { + for (auto& box : image.boxes) + { + std::map temp; + for (auto& p : box.parts) + temp[label_mapping[p.first]] = p.second; + box.parts = std::move(temp); + } + } +} + +// ---------------------------------------------------------------------------------------- + +void flip_dataset(const command_line_parser& parser) +{ + image_dataset_metadata::dataset metadata, orig_metadata; + string datasource; + if (parser.option("flip")) + datasource = parser.option("flip").argument(); + else + datasource = parser.option("flip-basic").argument(); + load_image_dataset_metadata(metadata,datasource); + orig_metadata = metadata; + + // Set the current directory to be the one that contains the + // metadata file. We do this because the file might contain + // file paths which are relative to this folder. + set_current_dir(get_parent_directory(file(datasource))); + + const string metadata_filename = get_parent_directory(file(datasource)).full_name() + + directory::get_separator() + "flipped_" + file(datasource).name(); + + + array2d img, temp; + for (unsigned long i = 0; i < metadata.images.size(); ++i) + { + file f(metadata.images[i].filename); + string filename = get_parent_directory(f).full_name() + directory::get_separator() + "flipped_" + to_png_name(f.name()); + + load_image(img, metadata.images[i].filename); + flip_image_left_right(img, temp); + if (parser.option("jpg")) + { + filename = to_jpg_name(filename); + save_jpeg(temp, filename,JPEG_QUALITY); + } + else + { + save_png(temp, filename); + } + + for (unsigned long j = 0; j < metadata.images[i].boxes.size(); ++j) + { + metadata.images[i].boxes[j].rect = impl::flip_rect_left_right(metadata.images[i].boxes[j].rect, get_rect(img)); + + // flip all the object parts + for (auto& part : metadata.images[i].boxes[j].parts) + { + part.second = impl::flip_rect_left_right(rectangle(part.second,part.second), get_rect(img)).tl_corner(); + } + } + + metadata.images[i].filename = filename; + } + + if (!parser.option("flip-basic")) + make_part_labeling_match_target_dataset(orig_metadata, metadata); + + save_image_dataset_metadata(metadata, metadata_filename); +} + +// ---------------------------------------------------------------------------------------- + diff --git a/ml/dlib/tools/imglab/src/flip_dataset.h b/ml/dlib/tools/imglab/src/flip_dataset.h new file mode 100644 index 000000000..8ac5db3e8 --- /dev/null +++ b/ml/dlib/tools/imglab/src/flip_dataset.h @@ -0,0 +1,12 @@ +// Copyright (C) 2011 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_IMGLAB_FLIP_DaTASET_H__ +#define DLIB_IMGLAB_FLIP_DaTASET_H__ + + +#include + +void flip_dataset(const dlib::command_line_parser& parser); + +#endif // DLIB_IMGLAB_FLIP_DaTASET_H__ + diff --git a/ml/dlib/tools/imglab/src/main.cpp b/ml/dlib/tools/imglab/src/main.cpp new file mode 100644 index 000000000..060c2c870 --- /dev/null +++ b/ml/dlib/tools/imglab/src/main.cpp @@ -0,0 +1,1145 @@ + +#include "dlib/data_io.h" +#include "dlib/string.h" +#include "metadata_editor.h" +#include "convert_pascal_xml.h" +#include "convert_pascal_v1.h" +#include "convert_idl.h" +#include "cluster.h" +#include "flip_dataset.h" +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + + +const char* VERSION = "1.13"; + + + +using namespace std; +using namespace dlib; + +// ---------------------------------------------------------------------------------------- + +void create_new_dataset ( + const command_line_parser& parser +) +{ + using namespace dlib::image_dataset_metadata; + + const std::string filename = parser.option("c").argument(); + // make sure the file exists so we can use the get_parent_directory() command to + // figure out it's parent directory. + make_empty_file(filename); + const std::string parent_dir = get_parent_directory(file(filename)); + + unsigned long depth = 0; + if (parser.option("r")) + depth = 30; + + dataset meta; + meta.name = "imglab dataset"; + meta.comment = "Created by imglab tool."; + for (unsigned long i = 0; i < parser.number_of_arguments(); ++i) + { + try + { + const string temp = strip_path(file(parser[i]), parent_dir); + meta.images.push_back(image(temp)); + } + catch (dlib::file::file_not_found&) + { + // then parser[i] should be a directory + + std::vector files = get_files_in_directory_tree(parser[i], + match_endings(".png .PNG .jpeg .JPEG .jpg .JPG .bmp .BMP .dng .DNG .gif .GIF"), + depth); + sort(files.begin(), files.end()); + + for (unsigned long j = 0; j < files.size(); ++j) + { + meta.images.push_back(image(strip_path(files[j], parent_dir))); + } + } + } + + save_image_dataset_metadata(meta, filename); +} + +// ---------------------------------------------------------------------------------------- + +int split_dataset ( + const command_line_parser& parser +) +{ + if (parser.number_of_arguments() != 1) + { + cerr << "The --split option requires you to give one XML file on the command line." << endl; + return EXIT_FAILURE; + } + + const std::string label = parser.option("split").argument(); + + dlib::image_dataset_metadata::dataset data, data_with, data_without; + load_image_dataset_metadata(data, parser[0]); + + data_with.name = data.name; + data_with.comment = data.comment; + data_without.name = data.name; + data_without.comment = data.comment; + + for (unsigned long i = 0; i < data.images.size(); ++i) + { + auto&& temp = data.images[i]; + + bool has_the_label = false; + // check for the label we are looking for + for (unsigned long j = 0; j < temp.boxes.size(); ++j) + { + if (temp.boxes[j].label == label) + { + has_the_label = true; + break; + } + } + + if (has_the_label) + data_with.images.push_back(temp); + else + data_without.images.push_back(temp); + } + + + save_image_dataset_metadata(data_with, left_substr(parser[0],".") + "_with_"+label + ".xml"); + save_image_dataset_metadata(data_without, left_substr(parser[0],".") + "_without_"+label + ".xml"); + + return EXIT_SUCCESS; +} + +// ---------------------------------------------------------------------------------------- + +void print_all_labels ( + const dlib::image_dataset_metadata::dataset& data +) +{ + std::set labels; + for (unsigned long i = 0; i < data.images.size(); ++i) + { + for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j) + { + labels.insert(data.images[i].boxes[j].label); + } + } + + for (std::set::iterator i = labels.begin(); i != labels.end(); ++i) + { + if (i->size() != 0) + { + cout << *i << endl; + } + } +} + +// ---------------------------------------------------------------------------------------- + +void print_all_label_stats ( + const dlib::image_dataset_metadata::dataset& data +) +{ + std::map > area_stats, aspect_ratio; + std::map image_hits; + std::set labels; + unsigned long num_unignored_boxes = 0; + for (unsigned long i = 0; i < data.images.size(); ++i) + { + std::set temp; + for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j) + { + labels.insert(data.images[i].boxes[j].label); + temp.insert(data.images[i].boxes[j].label); + + area_stats[data.images[i].boxes[j].label].add(data.images[i].boxes[j].rect.area()); + aspect_ratio[data.images[i].boxes[j].label].add(data.images[i].boxes[j].rect.width()/ + (double)data.images[i].boxes[j].rect.height()); + + if (!data.images[i].boxes[j].ignore) + ++num_unignored_boxes; + } + + // count the number of images for each label + for (std::set::iterator i = temp.begin(); i != temp.end(); ++i) + image_hits[*i] += 1; + } + + cout << "Number of images: "<< data.images.size() << endl; + cout << "Number of different labels: "<< labels.size() << endl; + cout << "Number of non-ignored boxes: " << num_unignored_boxes << endl << endl; + + for (std::set::iterator i = labels.begin(); i != labels.end(); ++i) + { + if (i->size() == 0) + cout << "Unlabeled Boxes:" << endl; + else + cout << "Label: "<< *i << endl; + cout << " number of images: " << image_hits[*i] << endl; + cout << " number of occurrences: " << area_stats[*i].current_n() << endl; + cout << " min box area: " << area_stats[*i].min() << endl; + cout << " max box area: " << area_stats[*i].max() << endl; + cout << " mean box area: " << area_stats[*i].mean() << endl; + cout << " stddev box area: " << area_stats[*i].stddev() << endl; + cout << " mean width/height ratio: " << aspect_ratio[*i].mean() << endl; + cout << " stddev width/height ratio: " << aspect_ratio[*i].stddev() << endl; + cout << endl; + } +} + +// ---------------------------------------------------------------------------------------- + +void rename_labels ( + dlib::image_dataset_metadata::dataset& data, + const std::string& from, + const std::string& to +) +{ + for (unsigned long i = 0; i < data.images.size(); ++i) + { + for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j) + { + if (data.images[i].boxes[j].label == from) + data.images[i].boxes[j].label = to; + } + } + +} + +// ---------------------------------------------------------------------------------------- + +void ignore_labels ( + dlib::image_dataset_metadata::dataset& data, + const std::string& label +) +{ + for (unsigned long i = 0; i < data.images.size(); ++i) + { + for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j) + { + if (data.images[i].boxes[j].label == label) + data.images[i].boxes[j].ignore = true; + } + } +} + +// ---------------------------------------------------------------------------------------- + +void merge_metadata_files ( + const command_line_parser& parser +) +{ + image_dataset_metadata::dataset src, dest; + load_image_dataset_metadata(src, parser.option("add").argument(0)); + load_image_dataset_metadata(dest, parser.option("add").argument(1)); + + std::map merged_data; + for (unsigned long i = 0; i < dest.images.size(); ++i) + merged_data[dest.images[i].filename] = dest.images[i]; + // now add in the src data and overwrite anything if there are duplicate entries. + for (unsigned long i = 0; i < src.images.size(); ++i) + merged_data[src.images[i].filename] = src.images[i]; + + // copy merged data into dest + dest.images.clear(); + for (std::map::const_iterator i = merged_data.begin(); + i != merged_data.end(); ++i) + { + dest.images.push_back(i->second); + } + + save_image_dataset_metadata(dest, "merged.xml"); +} + +// ---------------------------------------------------------------------------------------- + +void rotate_dataset(const command_line_parser& parser) +{ + image_dataset_metadata::dataset metadata; + const string datasource = parser[0]; + load_image_dataset_metadata(metadata,datasource); + + double angle = get_option(parser, "rotate", 0); + + // Set the current directory to be the one that contains the + // metadata file. We do this because the file might contain + // file paths which are relative to this folder. + set_current_dir(get_parent_directory(file(datasource))); + + const string file_prefix = "rotated_"+ cast_to_string(angle) + "_"; + const string metadata_filename = get_parent_directory(file(datasource)).full_name() + + directory::get_separator() + file_prefix + file(datasource).name(); + + + array2d img, temp; + for (unsigned long i = 0; i < metadata.images.size(); ++i) + { + file f(metadata.images[i].filename); + string filename = get_parent_directory(f).full_name() + directory::get_separator() + file_prefix + to_png_name(f.name()); + + load_image(img, metadata.images[i].filename); + const point_transform_affine tran = rotate_image(img, temp, angle*pi/180); + if (parser.option("jpg")) + { + filename = to_jpg_name(filename); + save_jpeg(temp, filename,JPEG_QUALITY); + } + else + { + save_png(temp, filename); + } + + for (unsigned long j = 0; j < metadata.images[i].boxes.size(); ++j) + { + const rectangle rect = metadata.images[i].boxes[j].rect; + rectangle newrect; + newrect += tran(rect.tl_corner()); + newrect += tran(rect.tr_corner()); + newrect += tran(rect.bl_corner()); + newrect += tran(rect.br_corner()); + // now make newrect have the same area as the starting rect. + double ratio = std::sqrt(rect.area()/(double)newrect.area()); + newrect = centered_rect(newrect, newrect.width()*ratio, newrect.height()*ratio); + metadata.images[i].boxes[j].rect = newrect; + + // rotate all the object parts + std::map::iterator k; + for (k = metadata.images[i].boxes[j].parts.begin(); k != metadata.images[i].boxes[j].parts.end(); ++k) + { + k->second = tran(k->second); + } + } + + metadata.images[i].filename = filename; + } + + save_image_dataset_metadata(metadata, metadata_filename); +} + +// ---------------------------------------------------------------------------------------- + +int resample_dataset(const command_line_parser& parser) +{ + if (parser.number_of_arguments() != 1) + { + cerr << "The --resample option requires you to give one XML file on the command line." << endl; + return EXIT_FAILURE; + } + + const size_t obj_size = get_option(parser,"cropped-object-size",100*100); + const double margin_scale = get_option(parser,"crop-size",2.5); // cropped image will be this times wider than the object. + const unsigned long min_object_size = get_option(parser,"min-object-size",1); + const bool one_object_per_image = parser.option("one-object-per-image"); + + dlib::image_dataset_metadata::dataset data, resampled_data; + std::ostringstream sout; + sout << "\nThe --resample parameters which generated this dataset were:" << endl; + sout << " cropped-object-size: "<< obj_size << endl; + sout << " crop-size: "<< margin_scale << endl; + sout << " min-object-size: "<< min_object_size << endl; + if (one_object_per_image) + sout << " one_object_per_image: true" << endl; + resampled_data.comment = data.comment + sout.str(); + resampled_data.name = data.name + " RESAMPLED"; + + load_image_dataset_metadata(data, parser[0]); + locally_change_current_dir chdir(get_parent_directory(file(parser[0]))); + dlib::rand rnd; + + const size_t image_size = std::round(std::sqrt(obj_size*margin_scale*margin_scale)); + const chip_dims cdims(image_size, image_size); + + console_progress_indicator pbar(data.images.size()); + for (unsigned long i = 0; i < data.images.size(); ++i) + { + // don't even bother loading images that don't have objects. + if (data.images[i].boxes.size() == 0) + continue; + + pbar.print_status(i); + array2d img, chip; + load_image(img, data.images[i].filename); + + + // figure out what chips we want to take from this image + for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j) + { + const rectangle rect = data.images[i].boxes[j].rect; + if (data.images[i].boxes[j].ignore || rect.area() < min_object_size) + continue; + + const auto max_dim = std::max(rect.width(), rect.height()); + + const double rand_scale_perturb = 1 - 0.3*(rnd.get_random_double()-0.5); + const rectangle crop_rect = centered_rect(rect, max_dim*margin_scale*rand_scale_perturb, max_dim*margin_scale*rand_scale_perturb); + + const rectangle_transform tform = get_mapping_to_chip(chip_details(crop_rect, cdims)); + extract_image_chip(img, chip_details(crop_rect, cdims), chip); + + image_dataset_metadata::image dimg; + // Now transform the boxes to the crop and also mark them as ignored if they + // have already been cropped out or are outside the crop. + for (size_t k = 0; k < data.images[i].boxes.size(); ++k) + { + image_dataset_metadata::box box = data.images[i].boxes[k]; + // ignore boxes outside the cropped image + if (crop_rect.intersect(box.rect).area() == 0) + continue; + + // mark boxes we include in the crop as ignored. Also mark boxes that + // aren't totally within the crop as ignored. + if (crop_rect.contains(grow_rect(box.rect,10)) && (!one_object_per_image || k==j)) + data.images[i].boxes[k].ignore = true; + else + box.ignore = true; + + if (box.rect.area() < min_object_size) + box.ignore = true; + + box.rect = tform(box.rect); + for (auto&& p : box.parts) + p.second = tform.get_tform()(p.second); + dimg.boxes.push_back(box); + } + // Put a 64bit hash of the image data into the name to make sure there are no + // file name conflicts. + std::ostringstream sout; + sout << hex << murmur_hash3_128bit(&chip[0][0], chip.size()*sizeof(chip[0][0])).second; + dimg.filename = data.images[i].filename + "_RESAMPLED_"+sout.str()+".png"; + + if (parser.option("jpg")) + { + dimg.filename = to_jpg_name(dimg.filename); + save_jpeg(chip,dimg.filename, JPEG_QUALITY); + } + else + { + save_png(chip,dimg.filename); + } + resampled_data.images.push_back(dimg); + } + } + + save_image_dataset_metadata(resampled_data, parser[0] + ".RESAMPLED.xml"); + + return EXIT_SUCCESS; +} + +// ---------------------------------------------------------------------------------------- + +int tile_dataset(const command_line_parser& parser) +{ + if (parser.number_of_arguments() != 1) + { + cerr << "The --tile option requires you to give one XML file on the command line." << endl; + return EXIT_FAILURE; + } + + string out_image = parser.option("tile").argument(); + string ext = right_substr(out_image,"."); + if (ext != "png" && ext != "jpg") + { + cerr << "The output image file must have either .png or .jpg extension." << endl; + return EXIT_FAILURE; + } + + const unsigned long chip_size = get_option(parser, "size", 8000); + + dlib::image_dataset_metadata::dataset data; + load_image_dataset_metadata(data, parser[0]); + locally_change_current_dir chdir(get_parent_directory(file(parser[0]))); + dlib::array > images; + console_progress_indicator pbar(data.images.size()); + for (unsigned long i = 0; i < data.images.size(); ++i) + { + // don't even bother loading images that don't have objects. + if (data.images[i].boxes.size() == 0) + continue; + + pbar.print_status(i); + array2d img; + load_image(img, data.images[i].filename); + + // figure out what chips we want to take from this image + std::vector dets; + for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j) + { + if (data.images[i].boxes[j].ignore) + continue; + + rectangle rect = data.images[i].boxes[j].rect; + dets.push_back(chip_details(rect, chip_size)); + } + // Now grab all those chips at once. + dlib::array > chips; + extract_image_chips(img, dets, chips); + // and put the chips into the output. + for (unsigned long j = 0; j < chips.size(); ++j) + images.push_back(chips[j]); + } + + chdir.revert(); + + if (ext == "png") + save_png(tile_images(images), out_image); + else + save_jpeg(tile_images(images), out_image); + + return EXIT_SUCCESS; +} + + +// ---------------------------------------------------------------------------------------- + +int main(int argc, char** argv) +{ + try + { + + command_line_parser parser; + + parser.add_option("h","Displays this information."); + parser.add_option("v","Display version."); + + parser.set_group_name("Creating XML files"); + parser.add_option("c","Create an XML file named listing a set of images.",1); + parser.add_option("r","Search directories recursively for images."); + parser.add_option("convert","Convert foreign image Annotations from format to the imglab format. " + "Supported formats: pascal-xml, pascal-v1, idl.",1); + + parser.set_group_name("Viewing XML files"); + parser.add_option("tile","Chip out all the objects and save them as one big image called .",1); + parser.add_option("size","When using --tile or --cluster, make each extracted object contain " + "about pixels (default 8000).",1); + parser.add_option("l","List all the labels in the given XML file."); + parser.add_option("stats","List detailed statistics on the object labels in the given XML file."); + parser.add_option("files","List all the files in the given XML file."); + + parser.set_group_name("Editing/Transforming XML datasets"); + parser.add_option("rename", "Rename all labels of to .",2); + parser.add_option("parts","The display will allow image parts to be labeled. The set of allowable parts " + "is defined by which should be a space separated list of parts.",1); + parser.add_option("rmempty","Remove all images that don't contain non-ignored annotations and save the results to a new XML file."); + parser.add_option("rmdupes","Remove duplicate images from the dataset. This is done by comparing " + "the md5 hash of each image file and removing duplicate images. " ); + parser.add_option("rmdiff","Set the ignored flag to true for boxes marked as difficult."); + parser.add_option("rmtrunc","Set the ignored flag to true for boxes that are partially outside the image."); + parser.add_option("sort-num-objects","Sort the images listed an XML file so images with many objects are listed first."); + parser.add_option("sort","Alphabetically sort the images in an XML file."); + parser.add_option("shuffle","Randomly shuffle the order of the images listed in an XML file."); + parser.add_option("seed", "When using --shuffle, set the random seed to the string .",1); + parser.add_option("split", "Split the contents of an XML file into two separate files. One containing the " + "images with objects labeled and another file with all the other images. ",1); + parser.add_option("add", "Add the image metadata from into . If any of the image " + "tags are in both files then the ones in are deleted and replaced with the " + "image tags from . The results are saved into merged.xml and neither or " + " files are modified.",2); + parser.add_option("flip", "Read an XML image dataset from the XML file and output a left-right flipped " + "version of the dataset and an accompanying flipped XML file named flipped_. " + "We also adjust object part labels after flipping so that the new flipped dataset " + "has the same average part layout as the source dataset." ,1); + parser.add_option("flip-basic", "This option is just like --flip, except we don't adjust any object part labels after flipping. " + "The parts are instead simply mirrored to the flipped dataset.", 1); + parser.add_option("rotate", "Read an XML image dataset and output a copy that is rotated counter clockwise by degrees. " + "The output is saved to an XML file prefixed with rotated_.",1); + parser.add_option("cluster", "Cluster all the objects in an XML file into different clusters and save " + "the results as cluster_###.xml and cluster_###.jpg files.",1); + parser.add_option("ignore", "Mark boxes labeled as as ignored. The resulting XML file is output as a separate file and the original is not modified.",1); + parser.add_option("rmlabel","Remove all boxes labeled and save the results to a new XML file.",1); + parser.add_option("rm-other-labels","Remove all boxes not labeled and save the results to a new XML file.",1); + parser.add_option("rmignore","Remove all boxes marked ignore and save the results to a new XML file."); + parser.add_option("rm-if-overlaps","Remove all boxes labeled if they overlap any box not labeled and save the results to a new XML file.",1); + parser.add_option("jpg", "When saving images to disk, write them as jpg files instead of png."); + + parser.set_group_name("Cropping sub images"); + parser.add_option("resample", "Crop out images that are centered on each object in the dataset. " + "The output is a new XML dataset."); + parser.add_option("cropped-object-size", "When doing --resample, make the cropped objects contain about pixels (default 10000).",1); + parser.add_option("min-object-size", "When doing --resample, skip objects that have fewer than pixels in them (default 1).",1); + parser.add_option("crop-size", "When doing --resample, the entire cropped image will be times wider than the object (default 2.5).",1); + parser.add_option("one-object-per-image", "When doing --resample, only include one non-ignored object per image (i.e. the central object)."); + + + + parser.parse(argc, argv); + + const char* singles[] = {"h","c","r","l","files","convert","parts","rmdiff", "rmtrunc", "rmdupes", "seed", "shuffle", "split", "add", + "flip-basic", "flip", "rotate", "tile", "size", "cluster", "resample", "min-object-size", "rmempty", + "crop-size", "cropped-object-size", "rmlabel", "rm-other-labels", "rm-if-overlaps", "sort-num-objects", + "one-object-per-image", "jpg", "rmignore", "sort"}; + parser.check_one_time_options(singles); + const char* c_sub_ops[] = {"r", "convert"}; + parser.check_sub_options("c", c_sub_ops); + parser.check_sub_option("shuffle", "seed"); + const char* resample_sub_ops[] = {"min-object-size", "crop-size", "cropped-object-size", "one-object-per-image"}; + parser.check_sub_options("resample", resample_sub_ops); + const char* size_parent_ops[] = {"tile", "cluster"}; + parser.check_sub_options(size_parent_ops, "size"); + parser.check_incompatible_options("c", "l"); + parser.check_incompatible_options("c", "files"); + parser.check_incompatible_options("c", "rmdiff"); + parser.check_incompatible_options("c", "rmempty"); + parser.check_incompatible_options("c", "rmlabel"); + parser.check_incompatible_options("c", "rm-other-labels"); + parser.check_incompatible_options("c", "rmignore"); + parser.check_incompatible_options("c", "rm-if-overlaps"); + parser.check_incompatible_options("c", "rmdupes"); + parser.check_incompatible_options("c", "rmtrunc"); + parser.check_incompatible_options("c", "add"); + parser.check_incompatible_options("c", "flip"); + parser.check_incompatible_options("c", "flip-basic"); + parser.check_incompatible_options("flip", "flip-basic"); + parser.check_incompatible_options("c", "rotate"); + parser.check_incompatible_options("c", "rename"); + parser.check_incompatible_options("c", "ignore"); + parser.check_incompatible_options("c", "parts"); + parser.check_incompatible_options("c", "tile"); + parser.check_incompatible_options("c", "cluster"); + parser.check_incompatible_options("c", "resample"); + parser.check_incompatible_options("l", "rename"); + parser.check_incompatible_options("l", "ignore"); + parser.check_incompatible_options("l", "add"); + parser.check_incompatible_options("l", "parts"); + parser.check_incompatible_options("l", "flip"); + parser.check_incompatible_options("l", "flip-basic"); + parser.check_incompatible_options("l", "rotate"); + parser.check_incompatible_options("files", "rename"); + parser.check_incompatible_options("files", "ignore"); + parser.check_incompatible_options("files", "add"); + parser.check_incompatible_options("files", "parts"); + parser.check_incompatible_options("files", "flip"); + parser.check_incompatible_options("files", "flip-basic"); + parser.check_incompatible_options("files", "rotate"); + parser.check_incompatible_options("add", "flip"); + parser.check_incompatible_options("add", "flip-basic"); + parser.check_incompatible_options("add", "rotate"); + parser.check_incompatible_options("add", "tile"); + parser.check_incompatible_options("flip", "tile"); + parser.check_incompatible_options("flip-basic", "tile"); + parser.check_incompatible_options("rotate", "tile"); + parser.check_incompatible_options("cluster", "tile"); + parser.check_incompatible_options("resample", "tile"); + parser.check_incompatible_options("flip", "cluster"); + parser.check_incompatible_options("flip-basic", "cluster"); + parser.check_incompatible_options("rotate", "cluster"); + parser.check_incompatible_options("add", "cluster"); + parser.check_incompatible_options("flip", "resample"); + parser.check_incompatible_options("flip-basic", "resample"); + parser.check_incompatible_options("rotate", "resample"); + parser.check_incompatible_options("add", "resample"); + parser.check_incompatible_options("shuffle", "tile"); + parser.check_incompatible_options("sort-num-objects", "tile"); + parser.check_incompatible_options("sort", "tile"); + parser.check_incompatible_options("convert", "l"); + parser.check_incompatible_options("convert", "files"); + parser.check_incompatible_options("convert", "rename"); + parser.check_incompatible_options("convert", "ignore"); + parser.check_incompatible_options("convert", "parts"); + parser.check_incompatible_options("convert", "cluster"); + parser.check_incompatible_options("convert", "resample"); + parser.check_incompatible_options("rmdiff", "rename"); + parser.check_incompatible_options("rmdiff", "ignore"); + parser.check_incompatible_options("rmempty", "ignore"); + parser.check_incompatible_options("rmempty", "rename"); + parser.check_incompatible_options("rmlabel", "ignore"); + parser.check_incompatible_options("rmlabel", "rename"); + parser.check_incompatible_options("rm-other-labels", "ignore"); + parser.check_incompatible_options("rm-other-labels", "rename"); + parser.check_incompatible_options("rmignore", "ignore"); + parser.check_incompatible_options("rmignore", "rename"); + parser.check_incompatible_options("rm-if-overlaps", "ignore"); + parser.check_incompatible_options("rm-if-overlaps", "rename"); + parser.check_incompatible_options("rmdupes", "rename"); + parser.check_incompatible_options("rmdupes", "ignore"); + parser.check_incompatible_options("rmtrunc", "rename"); + parser.check_incompatible_options("rmtrunc", "ignore"); + const char* convert_args[] = {"pascal-xml","pascal-v1","idl"}; + parser.check_option_arg_range("convert", convert_args); + parser.check_option_arg_range("cluster", 2, 999); + parser.check_option_arg_range("rotate", -360, 360); + parser.check_option_arg_range("size", 10*10, 1000*1000); + parser.check_option_arg_range("min-object-size", 1, 10000*10000); + parser.check_option_arg_range("cropped-object-size", 4, 10000*10000); + parser.check_option_arg_range("crop-size", 1.0, 100.0); + + if (parser.option("h")) + { + cout << "Usage: imglab [options] \n"; + parser.print_options(cout); + cout << endl << endl; + return EXIT_SUCCESS; + } + + if (parser.option("add")) + { + merge_metadata_files(parser); + return EXIT_SUCCESS; + } + + if (parser.option("flip") || parser.option("flip-basic")) + { + flip_dataset(parser); + return EXIT_SUCCESS; + } + + if (parser.option("rotate")) + { + rotate_dataset(parser); + return EXIT_SUCCESS; + } + + if (parser.option("v")) + { + cout << "imglab v" << VERSION + << "\nCompiled: " << __TIME__ << " " << __DATE__ + << "\nWritten by Davis King\n"; + cout << "Check for updates at http://dlib.net\n\n"; + return EXIT_SUCCESS; + } + + if (parser.option("tile")) + { + return tile_dataset(parser); + } + + if (parser.option("cluster")) + { + return cluster_dataset(parser); + } + + if (parser.option("resample")) + { + return resample_dataset(parser); + } + + if (parser.option("c")) + { + if (parser.option("convert")) + { + if (parser.option("convert").argument() == "pascal-xml") + convert_pascal_xml(parser); + else if (parser.option("convert").argument() == "pascal-v1") + convert_pascal_v1(parser); + else if (parser.option("convert").argument() == "idl") + convert_idl(parser); + } + else + { + create_new_dataset(parser); + } + return EXIT_SUCCESS; + } + + if (parser.option("rmdiff")) + { + if (parser.number_of_arguments() != 1) + { + cerr << "The --rmdiff option requires you to give one XML file on the command line." << endl; + return EXIT_FAILURE; + } + + dlib::image_dataset_metadata::dataset data; + load_image_dataset_metadata(data, parser[0]); + for (unsigned long i = 0; i < data.images.size(); ++i) + { + for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j) + { + if (data.images[i].boxes[j].difficult) + data.images[i].boxes[j].ignore = true; + } + } + save_image_dataset_metadata(data, parser[0]); + return EXIT_SUCCESS; + } + + if (parser.option("rmempty")) + { + if (parser.number_of_arguments() != 1) + { + cerr << "The --rmempty option requires you to give one XML file on the command line." << endl; + return EXIT_FAILURE; + } + + dlib::image_dataset_metadata::dataset data, data2; + load_image_dataset_metadata(data, parser[0]); + + data2 = data; + data2.images.clear(); + for (unsigned long i = 0; i < data.images.size(); ++i) + { + bool has_label = false; + for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j) + { + if (!data.images[i].boxes[j].ignore) + has_label = true; + } + if (has_label) + data2.images.push_back(data.images[i]); + } + save_image_dataset_metadata(data2, parser[0] + ".rmempty.xml"); + return EXIT_SUCCESS; + } + + if (parser.option("rmlabel")) + { + if (parser.number_of_arguments() != 1) + { + cerr << "The --rmlabel option requires you to give one XML file on the command line." << endl; + return EXIT_FAILURE; + } + + dlib::image_dataset_metadata::dataset data; + load_image_dataset_metadata(data, parser[0]); + + const auto label = parser.option("rmlabel").argument(); + + for (auto&& img : data.images) + { + std::vector boxes; + for (auto&& b : img.boxes) + { + if (b.label != label) + boxes.push_back(b); + } + img.boxes = boxes; + } + + save_image_dataset_metadata(data, parser[0] + ".rmlabel-"+label+".xml"); + return EXIT_SUCCESS; + } + + if (parser.option("rm-other-labels")) + { + if (parser.number_of_arguments() != 1) + { + cerr << "The --rm-other-labels option requires you to give one XML file on the command line." << endl; + return EXIT_FAILURE; + } + + dlib::image_dataset_metadata::dataset data; + load_image_dataset_metadata(data, parser[0]); + + const auto labels = parser.option("rm-other-labels").argument(); + // replace comma by dash to form the file name + std::string strlabels = labels; + std::replace(strlabels.begin(), strlabels.end(), ',', '-'); + std::vector all_labels = split(labels, ","); + for (auto&& img : data.images) + { + std::vector boxes; + for (auto&& b : img.boxes) + { + if (std::find(all_labels.begin(), all_labels.end(), b.label) != all_labels.end()) + boxes.push_back(b); + } + img.boxes = boxes; + } + + save_image_dataset_metadata(data, parser[0] + ".rm-other-labels-"+ strlabels +".xml"); + return EXIT_SUCCESS; + } + + if (parser.option("rmignore")) + { + if (parser.number_of_arguments() != 1) + { + cerr << "The --rmignore option requires you to give one XML file on the command line." << endl; + return EXIT_FAILURE; + } + + dlib::image_dataset_metadata::dataset data; + load_image_dataset_metadata(data, parser[0]); + + for (auto&& img : data.images) + { + std::vector boxes; + for (auto&& b : img.boxes) + { + if (!b.ignore) + boxes.push_back(b); + } + img.boxes = boxes; + } + + save_image_dataset_metadata(data, parser[0] + ".rmignore.xml"); + return EXIT_SUCCESS; + } + + if (parser.option("rm-if-overlaps")) + { + if (parser.number_of_arguments() != 1) + { + cerr << "The --rm-if-overlaps option requires you to give one XML file on the command line." << endl; + return EXIT_FAILURE; + } + + dlib::image_dataset_metadata::dataset data; + load_image_dataset_metadata(data, parser[0]); + + const auto label = parser.option("rm-if-overlaps").argument(); + + test_box_overlap overlaps(0.5); + + for (auto&& img : data.images) + { + std::vector boxes; + for (auto&& b : img.boxes) + { + if (b.label != label) + { + boxes.push_back(b); + } + else + { + bool has_overlap = false; + for (auto&& b2 : img.boxes) + { + if (b2.label != label && overlaps(b2.rect, b.rect)) + { + has_overlap = true; + break; + } + } + if (!has_overlap) + boxes.push_back(b); + } + } + img.boxes = boxes; + } + + save_image_dataset_metadata(data, parser[0] + ".rm-if-overlaps-"+label+".xml"); + return EXIT_SUCCESS; + } + + if (parser.option("rmdupes")) + { + if (parser.number_of_arguments() != 1) + { + cerr << "The --rmdupes option requires you to give one XML file on the command line." << endl; + return EXIT_FAILURE; + } + + dlib::image_dataset_metadata::dataset data, data_out; + std::set hashes; + load_image_dataset_metadata(data, parser[0]); + data_out = data; + data_out.images.clear(); + + for (unsigned long i = 0; i < data.images.size(); ++i) + { + ifstream fin(data.images[i].filename.c_str(), ios::binary); + string hash = md5(fin); + if (hashes.count(hash) == 0) + { + hashes.insert(hash); + data_out.images.push_back(data.images[i]); + } + } + save_image_dataset_metadata(data_out, parser[0]); + return EXIT_SUCCESS; + } + + if (parser.option("rmtrunc")) + { + if (parser.number_of_arguments() != 1) + { + cerr << "The --rmtrunc option requires you to give one XML file on the command line." << endl; + return EXIT_FAILURE; + } + + dlib::image_dataset_metadata::dataset data; + load_image_dataset_metadata(data, parser[0]); + { + locally_change_current_dir chdir(get_parent_directory(file(parser[0]))); + for (unsigned long i = 0; i < data.images.size(); ++i) + { + array2d img; + load_image(img, data.images[i].filename); + const rectangle area = get_rect(img); + for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j) + { + if (!area.contains(data.images[i].boxes[j].rect)) + data.images[i].boxes[j].ignore = true; + } + } + } + save_image_dataset_metadata(data, parser[0]); + return EXIT_SUCCESS; + } + + if (parser.option("l")) + { + if (parser.number_of_arguments() != 1) + { + cerr << "The -l option requires you to give one XML file on the command line." << endl; + return EXIT_FAILURE; + } + + dlib::image_dataset_metadata::dataset data; + load_image_dataset_metadata(data, parser[0]); + print_all_labels(data); + return EXIT_SUCCESS; + } + + if (parser.option("files")) + { + if (parser.number_of_arguments() != 1) + { + cerr << "The --files option requires you to give one XML file on the command line." << endl; + return EXIT_FAILURE; + } + + dlib::image_dataset_metadata::dataset data; + load_image_dataset_metadata(data, parser[0]); + for (size_t i = 0; i < data.images.size(); ++i) + cout << data.images[i].filename << "\n"; + return EXIT_SUCCESS; + } + + if (parser.option("split")) + { + return split_dataset(parser); + } + + if (parser.option("shuffle")) + { + if (parser.number_of_arguments() != 1) + { + cerr << "The --shuffle option requires you to give one XML file on the command line." << endl; + return EXIT_FAILURE; + } + + dlib::image_dataset_metadata::dataset data; + load_image_dataset_metadata(data, parser[0]); + const string default_seed = cast_to_string(time(0)); + const string seed = get_option(parser, "seed", default_seed); + dlib::rand rnd(seed); + randomize_samples(data.images, rnd); + save_image_dataset_metadata(data, parser[0]); + return EXIT_SUCCESS; + } + + if (parser.option("sort-num-objects")) + { + if (parser.number_of_arguments() != 1) + { + cerr << "The --sort-num-objects option requires you to give one XML file on the command line." << endl; + return EXIT_FAILURE; + } + + dlib::image_dataset_metadata::dataset data; + load_image_dataset_metadata(data, parser[0]); + std::sort(data.images.rbegin(), data.images.rend(), + [](const image_dataset_metadata::image& a, const image_dataset_metadata::image& b) { return a.boxes.size() < b.boxes.size(); }); + save_image_dataset_metadata(data, parser[0]); + return EXIT_SUCCESS; + } + + if (parser.option("sort")) + { + if (parser.number_of_arguments() != 1) + { + cerr << "The --sort option requires you to give one XML file on the command line." << endl; + return EXIT_FAILURE; + } + + dlib::image_dataset_metadata::dataset data; + load_image_dataset_metadata(data, parser[0]); + std::sort(data.images.begin(), data.images.end(), + [](const image_dataset_metadata::image& a, const image_dataset_metadata::image& b) { return a.filename < b.filename; }); + save_image_dataset_metadata(data, parser[0]); + return EXIT_SUCCESS; + } + + if (parser.option("stats")) + { + if (parser.number_of_arguments() != 1) + { + cerr << "The --stats option requires you to give one XML file on the command line." << endl; + return EXIT_FAILURE; + } + + dlib::image_dataset_metadata::dataset data; + load_image_dataset_metadata(data, parser[0]); + print_all_label_stats(data); + return EXIT_SUCCESS; + } + + if (parser.option("rename")) + { + if (parser.number_of_arguments() != 1) + { + cerr << "The --rename option requires you to give one XML file on the command line." << endl; + return EXIT_FAILURE; + } + + dlib::image_dataset_metadata::dataset data; + load_image_dataset_metadata(data, parser[0]); + for (unsigned long i = 0; i < parser.option("rename").count(); ++i) + { + rename_labels(data, parser.option("rename").argument(0,i), parser.option("rename").argument(1,i)); + } + save_image_dataset_metadata(data, parser[0]); + return EXIT_SUCCESS; + } + + if (parser.option("ignore")) + { + if (parser.number_of_arguments() != 1) + { + cerr << "The --ignore option requires you to give one XML file on the command line." << endl; + return EXIT_FAILURE; + } + + dlib::image_dataset_metadata::dataset data; + load_image_dataset_metadata(data, parser[0]); + for (unsigned long i = 0; i < parser.option("ignore").count(); ++i) + { + ignore_labels(data, parser.option("ignore").argument()); + } + save_image_dataset_metadata(data, parser[0]+".ignored.xml"); + return EXIT_SUCCESS; + } + + if (parser.number_of_arguments() == 1) + { + metadata_editor editor(parser[0]); + if (parser.option("parts")) + { + std::vector parts = split(parser.option("parts").argument()); + for (unsigned long i = 0; i < parts.size(); ++i) + { + editor.add_labelable_part_name(parts[i]); + } + } + editor.wait_until_closed(); + return EXIT_SUCCESS; + } + + cout << "Invalid command, give -h to see options." << endl; + return EXIT_FAILURE; + } + catch (exception& e) + { + cerr << e.what() << endl; + return EXIT_FAILURE; + } +} + +// ---------------------------------------------------------------------------------------- + diff --git a/ml/dlib/tools/imglab/src/metadata_editor.cpp b/ml/dlib/tools/imglab/src/metadata_editor.cpp new file mode 100644 index 000000000..76177e893 --- /dev/null +++ b/ml/dlib/tools/imglab/src/metadata_editor.cpp @@ -0,0 +1,671 @@ +// Copyright (C) 2011 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. + +#include "metadata_editor.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace dlib; + +extern const char* VERSION; + +// ---------------------------------------------------------------------------------------- + +metadata_editor:: +metadata_editor( + const std::string& filename_ +) : + mbar(*this), + lb_images(*this), + image_pos(0), + display(*this), + overlay_label_name(*this), + overlay_label(*this), + keyboard_jump_pos(0), + last_keyboard_jump_pos_update(0) +{ + file metadata_file(filename_); + filename = metadata_file.full_name(); + // Make our current directory be the one that contains the metadata file. We + // do this because that file might contain relative paths to the image files + // we are supposed to be loading. + set_current_dir(get_parent_directory(metadata_file).full_name()); + + load_image_dataset_metadata(metadata, filename); + + dlib::array::expand_1a files; + files.resize(metadata.images.size()); + for (unsigned long i = 0; i < metadata.images.size(); ++i) + { + files[i] = metadata.images[i].filename; + } + lb_images.load(files); + lb_images.enable_multiple_select(); + + lb_images.set_click_handler(*this, &metadata_editor::on_lb_images_clicked); + + overlay_label_name.set_text("Next Label: "); + overlay_label.set_width(200); + + display.set_image_clicked_handler(*this, &metadata_editor::on_image_clicked); + display.set_overlay_rects_changed_handler(*this, &metadata_editor::on_overlay_rects_changed); + display.set_overlay_rect_selected_handler(*this, &metadata_editor::on_overlay_rect_selected); + overlay_label.set_text_modified_handler(*this, &metadata_editor::on_overlay_label_changed); + + mbar.set_number_of_menus(2); + mbar.set_menu_name(0,"File",'F'); + mbar.set_menu_name(1,"Help",'H'); + + + mbar.menu(0).add_menu_item(menu_item_text("Save",*this,&metadata_editor::file_save,'S')); + mbar.menu(0).add_menu_item(menu_item_text("Save As",*this,&metadata_editor::file_save_as,'A')); + mbar.menu(0).add_menu_item(menu_item_separator()); + mbar.menu(0).add_menu_item(menu_item_text("Remove Selected Images",*this,&metadata_editor::remove_selected_images,'R')); + mbar.menu(0).add_menu_item(menu_item_separator()); + mbar.menu(0).add_menu_item(menu_item_text("Exit",static_cast(*this),&drawable_window::close_window,'x')); + + mbar.menu(1).add_menu_item(menu_item_text("About",*this,&metadata_editor::display_about,'A')); + + // set the size of this window. + on_window_resized(); + load_image_and_set_size(0); + on_window_resized(); + if (image_pos < lb_images.size() ) + lb_images.select(image_pos); + + // make sure the window is centered on the screen. + unsigned long width, height; + get_size(width, height); + unsigned long screen_width, screen_height; + get_display_size(screen_width, screen_height); + set_pos((screen_width-width)/2, (screen_height-height)/2); + + show(); +} + +// ---------------------------------------------------------------------------------------- + +metadata_editor:: +~metadata_editor( +) +{ + close_window(); +} + +// ---------------------------------------------------------------------------------------- + +void metadata_editor:: +add_labelable_part_name ( + const std::string& name +) +{ + display.add_labelable_part_name(name); +} + +// ---------------------------------------------------------------------------------------- + +void metadata_editor:: +file_save() +{ + save_metadata_to_file(filename); +} + +// ---------------------------------------------------------------------------------------- + +void metadata_editor:: +save_metadata_to_file ( + const std::string& file +) +{ + try + { + save_image_dataset_metadata(metadata, file); + } + catch (dlib::error& e) + { + message_box("Error saving file", e.what()); + } +} + +// ---------------------------------------------------------------------------------------- + +void metadata_editor:: +file_save_as() +{ + save_file_box(*this, &metadata_editor::save_metadata_to_file); +} + +// ---------------------------------------------------------------------------------------- + +void metadata_editor:: +remove_selected_images() +{ + dlib::queue::kernel_1a list; + lb_images.get_selected(list); + list.reset(); + unsigned long min_idx = lb_images.size(); + while (list.move_next()) + { + lb_images.unselect(list.element()); + min_idx = std::min(min_idx, list.element()); + } + + + // remove all the selected items from metadata.images + dlib::static_set::kernel_1a to_remove; + to_remove.load(list); + std::vector images; + for (unsigned long i = 0; i < metadata.images.size(); ++i) + { + if (to_remove.is_member(i) == false) + { + images.push_back(metadata.images[i]); + } + } + images.swap(metadata.images); + + + // reload metadata into lb_images + dlib::array::expand_1a files; + files.resize(metadata.images.size()); + for (unsigned long i = 0; i < metadata.images.size(); ++i) + { + files[i] = metadata.images[i].filename; + } + lb_images.load(files); + + + if (min_idx != 0) + min_idx--; + select_image(min_idx); +} + +// ---------------------------------------------------------------------------------------- + +void metadata_editor:: +on_window_resized( +) +{ + drawable_window::on_window_resized(); + + unsigned long width, height; + get_size(width, height); + + lb_images.set_pos(0,mbar.bottom()+1); + lb_images.set_size(180, height - mbar.height()); + + overlay_label_name.set_pos(lb_images.right()+10, mbar.bottom() + (overlay_label.height()-overlay_label_name.height())/2+1); + overlay_label.set_pos(overlay_label_name.right(), mbar.bottom()+1); + display.set_pos(lb_images.right(), overlay_label.bottom()+3); + + display.set_size(width - display.left(), height - display.top()); +} + +// ---------------------------------------------------------------------------------------- + +void propagate_boxes( + dlib::image_dataset_metadata::dataset& data, + unsigned long prev, + unsigned long next +) +{ + if (prev == next || next >= data.images.size()) + return; + + array2d img1, img2; + dlib::load_image(img1, data.images[prev].filename); + dlib::load_image(img2, data.images[next].filename); + for (unsigned long i = 0; i < data.images[prev].boxes.size(); ++i) + { + correlation_tracker tracker; + tracker.start_track(img1, data.images[prev].boxes[i].rect); + tracker.update(img2); + dlib::image_dataset_metadata::box box = data.images[prev].boxes[i]; + box.rect = tracker.get_position(); + data.images[next].boxes.push_back(box); + } +} + +// ---------------------------------------------------------------------------------------- + +void propagate_labels( + const std::string& label, + dlib::image_dataset_metadata::dataset& data, + unsigned long prev, + unsigned long next +) +{ + if (prev == next || next >= data.images.size()) + return; + + + for (unsigned long i = 0; i < data.images[prev].boxes.size(); ++i) + { + if (data.images[prev].boxes[i].label != label) + continue; + + // figure out which box in the next image matches the current one the best + const rectangle cur = data.images[prev].boxes[i].rect; + double best_overlap = 0; + unsigned long best_idx = 0; + for (unsigned long j = 0; j < data.images[next].boxes.size(); ++j) + { + const rectangle next_box = data.images[next].boxes[j].rect; + const double overlap = cur.intersect(next_box).area()/(double)(cur+next_box).area(); + if (overlap > best_overlap) + { + best_overlap = overlap; + best_idx = j; + } + } + + // If we found a matching rectangle in the next image and the best match doesn't + // already have a label. + if (best_overlap > 0.5 && data.images[next].boxes[best_idx].label == "") + { + data.images[next].boxes[best_idx].label = label; + } + } + +} + +// ---------------------------------------------------------------------------------------- + +bool has_label_or_all_boxes_labeled ( + const std::string& label, + const dlib::image_dataset_metadata::image& img +) +{ + if (label.size() == 0) + return true; + + bool all_boxes_labeled = true; + for (unsigned long i = 0; i < img.boxes.size(); ++i) + { + if (img.boxes[i].label == label) + return true; + if (img.boxes[i].label.size() == 0) + all_boxes_labeled = false; + } + + return all_boxes_labeled; +} + +// ---------------------------------------------------------------------------------------- + +void metadata_editor:: +on_keydown ( + unsigned long key, + bool is_printable, + unsigned long state +) +{ + drawable_window::on_keydown(key, is_printable, state); + + if (is_printable) + { + if (key == '\t') + { + overlay_label.give_input_focus(); + overlay_label.select_all_text(); + } + + // If the user types a number then jump to that image. + if ('0' <= key && key <= '9' && metadata.images.size() != 0 && !overlay_label.has_input_focus()) + { + time_t curtime = time(0); + // If it's been a while since the user typed numbers then forget the last jump + // position and start accumulating numbers over again. + if (curtime-last_keyboard_jump_pos_update >= 2) + keyboard_jump_pos = 0; + last_keyboard_jump_pos_update = curtime; + + keyboard_jump_pos *= 10; + keyboard_jump_pos += key-'0'; + if (keyboard_jump_pos >= metadata.images.size()) + keyboard_jump_pos = metadata.images.size()-1; + + image_pos = keyboard_jump_pos; + select_image(image_pos); + } + else + { + last_keyboard_jump_pos_update = 0; + } + + if (key == 'd' && (state&base_window::KBD_MOD_ALT)) + { + remove_selected_images(); + } + + if (key == 'e' && !overlay_label.has_input_focus()) + { + display_equialized_image = !display_equialized_image; + select_image(image_pos); + } + + // Make 'w' and 's' act like KEY_UP and KEY_DOWN + if ((key == 'w' || key == 'W') && !overlay_label.has_input_focus()) + { + key = base_window::KEY_UP; + } + else if ((key == 's' || key == 'S') && !overlay_label.has_input_focus()) + { + key = base_window::KEY_DOWN; + } + else + { + return; + } + } + + if (key == base_window::KEY_UP) + { + if ((state&KBD_MOD_CONTROL) && (state&KBD_MOD_SHIFT)) + { + // Don't do anything if there are no boxes in the current image. + if (metadata.images[image_pos].boxes.size() == 0) + return; + // Also don't do anything if there *are* boxes in the next image. + if (image_pos > 1 && metadata.images[image_pos-1].boxes.size() != 0) + return; + + propagate_boxes(metadata, image_pos, image_pos-1); + } + else if (state&base_window::KBD_MOD_CONTROL) + { + // If the label we are supposed to propagate doesn't exist in the current image + // then don't advance. + if (!has_label_or_all_boxes_labeled(display.get_default_overlay_rect_label(),metadata.images[image_pos])) + return; + + // if the next image is going to be empty then fast forward to the next one + while (image_pos > 1 && metadata.images[image_pos-1].boxes.size() == 0) + --image_pos; + + propagate_labels(display.get_default_overlay_rect_label(), metadata, image_pos, image_pos-1); + } + select_image(image_pos-1); + } + else if (key == base_window::KEY_DOWN) + { + if ((state&KBD_MOD_CONTROL) && (state&KBD_MOD_SHIFT)) + { + // Don't do anything if there are no boxes in the current image. + if (metadata.images[image_pos].boxes.size() == 0) + return; + // Also don't do anything if there *are* boxes in the next image. + if (image_pos+1 < metadata.images.size() && metadata.images[image_pos+1].boxes.size() != 0) + return; + + propagate_boxes(metadata, image_pos, image_pos+1); + } + else if (state&base_window::KBD_MOD_CONTROL) + { + // If the label we are supposed to propagate doesn't exist in the current image + // then don't advance. + if (!has_label_or_all_boxes_labeled(display.get_default_overlay_rect_label(),metadata.images[image_pos])) + return; + + // if the next image is going to be empty then fast forward to the next one + while (image_pos+1 < metadata.images.size() && metadata.images[image_pos+1].boxes.size() == 0) + ++image_pos; + + propagate_labels(display.get_default_overlay_rect_label(), metadata, image_pos, image_pos+1); + } + select_image(image_pos+1); + } +} + +// ---------------------------------------------------------------------------------------- + +void metadata_editor:: +select_image( + unsigned long idx +) +{ + if (idx < lb_images.size()) + { + // unselect all currently selected images + dlib::queue::kernel_1a list; + lb_images.get_selected(list); + list.reset(); + while (list.move_next()) + { + lb_images.unselect(list.element()); + } + + + lb_images.select(idx); + load_image(idx); + } + else if (lb_images.size() == 0) + { + display.clear_overlay(); + array2d empty_img; + display.set_image(empty_img); + } +} + +// ---------------------------------------------------------------------------------------- + +void metadata_editor:: +on_lb_images_clicked( + unsigned long idx +) +{ + load_image(idx); +} + +// ---------------------------------------------------------------------------------------- + +std::vector get_overlays ( + const dlib::image_dataset_metadata::image& data, + color_mapper& string_to_color +) +{ + std::vector temp(data.boxes.size()); + for (unsigned long i = 0; i < temp.size(); ++i) + { + temp[i].rect = data.boxes[i].rect; + temp[i].label = data.boxes[i].label; + temp[i].parts = data.boxes[i].parts; + temp[i].crossed_out = data.boxes[i].ignore; + temp[i].color = string_to_color(data.boxes[i].label); + } + return temp; +} + +// ---------------------------------------------------------------------------------------- + +void metadata_editor:: +load_image( + unsigned long idx +) +{ + if (idx >= metadata.images.size()) + return; + + image_pos = idx; + + array2d img; + display.clear_overlay(); + try + { + dlib::load_image(img, metadata.images[idx].filename); + set_title(metadata.name + " #"+cast_to_string(idx)+": " +metadata.images[idx].filename); + } + catch (exception& e) + { + message_box("Error loading image", e.what()); + } + + if (display_equialized_image) + equalize_histogram(img); + display.set_image(img); + display.add_overlay(get_overlays(metadata.images[idx], string_to_color)); +} + +// ---------------------------------------------------------------------------------------- + +void metadata_editor:: +load_image_and_set_size( + unsigned long idx +) +{ + if (idx >= metadata.images.size()) + return; + + image_pos = idx; + + array2d img; + display.clear_overlay(); + try + { + dlib::load_image(img, metadata.images[idx].filename); + set_title(metadata.name + " #"+cast_to_string(idx)+": " +metadata.images[idx].filename); + } + catch (exception& e) + { + message_box("Error loading image", e.what()); + } + + + unsigned long screen_width, screen_height; + get_display_size(screen_width, screen_height); + + + unsigned long needed_width = display.left() + img.nc() + 4; + unsigned long needed_height = display.top() + img.nr() + 4; + if (needed_width < 300) needed_width = 300; + if (needed_height < 300) needed_height = 300; + + if (needed_width > 100 + screen_width) + needed_width = screen_width - 100; + if (needed_height > 100 + screen_height) + needed_height = screen_height - 100; + + set_size(needed_width, needed_height); + + + if (display_equialized_image) + equalize_histogram(img); + display.set_image(img); + display.add_overlay(get_overlays(metadata.images[idx], string_to_color)); +} + +// ---------------------------------------------------------------------------------------- + +void metadata_editor:: +on_overlay_rects_changed( +) +{ + using namespace dlib::image_dataset_metadata; + if (image_pos < metadata.images.size()) + { + const std::vector& rects = display.get_overlay_rects(); + + std::vector& boxes = metadata.images[image_pos].boxes; + + boxes.clear(); + for (unsigned long i = 0; i < rects.size(); ++i) + { + box temp; + temp.label = rects[i].label; + temp.rect = rects[i].rect; + temp.parts = rects[i].parts; + temp.ignore = rects[i].crossed_out; + boxes.push_back(temp); + } + } +} + +// ---------------------------------------------------------------------------------------- + +void metadata_editor:: +on_image_clicked( + const point& /*p*/, bool /*is_double_click*/, unsigned long /*btn*/ +) +{ + display.set_default_overlay_rect_color(string_to_color(trim(overlay_label.text()))); +} + +// ---------------------------------------------------------------------------------------- + +void metadata_editor:: +on_overlay_label_changed( +) +{ + display.set_default_overlay_rect_label(trim(overlay_label.text())); +} + +// ---------------------------------------------------------------------------------------- + +void metadata_editor:: +on_overlay_rect_selected( + const image_display::overlay_rect& orect +) +{ + overlay_label.set_text(orect.label); + display.set_default_overlay_rect_label(orect.label); + display.set_default_overlay_rect_color(string_to_color(orect.label)); +} + +// ---------------------------------------------------------------------------------------- + +void metadata_editor:: +display_about( +) +{ + std::ostringstream sout; + sout << wrap_string("Image Labeler v" + string(VERSION) + "." ,0,0) << endl << endl; + sout << wrap_string("This program is a tool for labeling images with rectangles. " ,0,0) << endl << endl; + + sout << wrap_string("You can add a new rectangle by holding the shift key, left clicking " + "the mouse, and dragging it. New rectangles are given the label from the \"Next Label\" " + "field at the top of the application. You can quickly edit the contents of the Next Label field " + "by hitting the tab key. Double clicking " + "a rectangle selects it and the delete key removes it. You can also mark " + "a rectangle as ignored by hitting the i or END keys when it is selected. Ignored " + "rectangles are visually displayed with an X through them. You can remove an image " + "entirely by selecting it in the list on the left and pressing alt+d." + ,0,0) << endl << endl; + + sout << wrap_string("It is also possible to label object parts by selecting a rectangle and " + "then right clicking. A popup menu will appear and you can select a part label. " + "Note that you must define the allowable part labels by giving --parts on the " + "command line. An example would be '--parts \"leye reye nose mouth\"'." + ,0,0) << endl << endl; + + sout << wrap_string("Press the down or s key to select the next image in the list and the up or w " + "key to select the previous one.",0,0) << endl << endl; + + sout << wrap_string("Additionally, you can hold ctrl and then scroll the mouse wheel to zoom. A normal left click " + "and drag allows you to navigate around the image. Holding ctrl and " + "left clicking a rectangle will give it the label from the Next Label field. " + "Holding shift + right click and then dragging allows you to move things around. " + "Holding ctrl and pressing the up or down keyboard keys will propagate " + "rectangle labels from one image to the next and also skip empty images. " + "Similarly, holding ctrl+shift will propagate entire boxes via a visual tracking " + "algorithm from one image to the next. " + "Finally, typing a number on the keyboard will jump you to a specific image.",0,0) << endl << endl; + + sout << wrap_string("You can also toggle image histogram equalization by pressing the e key." + ,0,0) << endl; + + + message_box("About Image Labeler",sout.str()); +} + +// ---------------------------------------------------------------------------------------- + diff --git a/ml/dlib/tools/imglab/src/metadata_editor.h b/ml/dlib/tools/imglab/src/metadata_editor.h new file mode 100644 index 000000000..71aa14ace --- /dev/null +++ b/ml/dlib/tools/imglab/src/metadata_editor.h @@ -0,0 +1,116 @@ +// Copyright (C) 2011 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_METADATA_EdITOR_H__ +#define DLIB_METADATA_EdITOR_H__ + +#include +#include +#include +#include + +// ---------------------------------------------------------------------------------------- + +class color_mapper +{ +public: + + dlib::rgb_alpha_pixel operator() ( + const std::string& str + ) + { + auto i = colors.find(str); + if (i != colors.end()) + { + return i->second; + } + else + { + using namespace dlib; + hsi_pixel pix; + pix.h = reverse(colors.size()); + pix.s = 255; + pix.i = 150; + rgb_alpha_pixel result; + assign_pixel(result, pix); + colors[str] = result; + return result; + } + } + +private: + + // We use a bit reverse here because it causes us to evenly spread the colors as we + // allocated them. First the colors are maximally different, then become interleaved + // and progressively more similar as they are allocated. + unsigned char reverse(unsigned char b) + { + // reverse the order of the bits in b. + b = ((b * 0x0802LU & 0x22110LU) | (b * 0x8020LU & 0x88440LU)) * 0x10101LU >> 16; + return b; + } + + std::map colors; +}; + +// ---------------------------------------------------------------------------------------- + +class metadata_editor : public dlib::drawable_window +{ +public: + metadata_editor( + const std::string& filename_ + ); + + ~metadata_editor(); + + void add_labelable_part_name ( + const std::string& name + ); + +private: + + void file_save(); + void file_save_as(); + void remove_selected_images(); + + virtual void on_window_resized(); + virtual void on_keydown ( + unsigned long key, + bool is_printable, + unsigned long state + ); + + void on_lb_images_clicked(unsigned long idx); + void select_image(unsigned long idx); + void save_metadata_to_file (const std::string& file); + void load_image(unsigned long idx); + void load_image_and_set_size(unsigned long idx); + void on_image_clicked(const dlib::point& p, bool is_double_click, unsigned long btn); + void on_overlay_rects_changed(); + void on_overlay_label_changed(); + void on_overlay_rect_selected(const dlib::image_display::overlay_rect& orect); + + void display_about(); + + std::string filename; + dlib::image_dataset_metadata::dataset metadata; + + dlib::menu_bar mbar; + dlib::list_box lb_images; + unsigned long image_pos; + + dlib::image_display display; + dlib::label overlay_label_name; + dlib::text_field overlay_label; + + unsigned long keyboard_jump_pos; + time_t last_keyboard_jump_pos_update; + bool display_equialized_image = false; + color_mapper string_to_color; +}; + +// ---------------------------------------------------------------------------------------- + + +#endif // DLIB_METADATA_EdITOR_H__ + -- cgit v1.2.3