From b5f8ee61a7f7e9bd291dd26b0585d03eb686c941 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 5 May 2024 13:19:16 +0200 Subject: Adding upstream version 1.46.3. Signed-off-by: Daniel Baumann --- ml/dlib/tools/imglab/src/cluster.cpp | 260 ----------------------------------- 1 file changed, 260 deletions(-) delete mode 100644 ml/dlib/tools/imglab/src/cluster.cpp (limited to 'ml/dlib/tools/imglab/src/cluster.cpp') diff --git a/ml/dlib/tools/imglab/src/cluster.cpp b/ml/dlib/tools/imglab/src/cluster.cpp deleted file mode 100644 index 23b289a7f..000000000 --- a/ml/dlib/tools/imglab/src/cluster.cpp +++ /dev/null @@ -1,260 +0,0 @@ -// Copyright (C) 2015 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. - -#include "cluster.h" -#include -#include -#include -#include -#include -#include -#include -#include - -// ---------------------------------------------------------------------------------------- - -using namespace std; -using namespace dlib; - -// ---------------------------------------------------------------------------- - -struct assignment -{ - unsigned long c; - double dist; - unsigned long idx; - - bool operator<(const assignment& item) const - { return dist < item.dist; } -}; - -std::vector angular_cluster ( - std::vector > feats, - const unsigned long num_clusters -) -{ - DLIB_CASSERT(feats.size() != 0, "The dataset can't be empty"); - for (unsigned long i = 0; i < feats.size(); ++i) - { - DLIB_CASSERT(feats[i].size() == feats[0].size(), "All feature vectors must have the same length."); - } - - // find the centroid of feats - matrix m; - for (unsigned long i = 0; i < feats.size(); ++i) - m += feats[i]; - m /= feats.size(); - - // Now center feats and then project onto the unit sphere. The reason for projecting - // onto the unit sphere is so pick_initial_centers() works in a sensible way. - for (unsigned long i = 0; i < feats.size(); ++i) - { - feats[i] -= m; - double len = length(feats[i]); - if (len != 0) - feats[i] /= len; - } - - // now do angular clustering of the points - std::vector > centers; - pick_initial_centers(num_clusters, centers, feats, linear_kernel >(), 0.05); - find_clusters_using_angular_kmeans(feats, centers); - - // and then report the resulting assignments - std::vector assignments; - for (unsigned long i = 0; i < feats.size(); ++i) - { - assignment temp; - temp.c = nearest_center(centers, feats[i]); - temp.dist = length(feats[i] - centers[temp.c]); - temp.idx = i; - assignments.push_back(temp); - } - return assignments; -} - -// ---------------------------------------------------------------------------------------- - -bool compare_first ( - const std::pair& a, - const std::pair& b -) -{ - return a.first < b.first; -} - -// ---------------------------------------------------------------------------------------- - -double mean_aspect_ratio ( - const image_dataset_metadata::dataset& data -) -{ - double sum = 0; - double cnt = 0; - for (unsigned long i = 0; i < data.images.size(); ++i) - { - for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j) - { - rectangle rect = data.images[i].boxes[j].rect; - if (rect.area() == 0 || data.images[i].boxes[j].ignore) - continue; - sum += rect.width()/(double)rect.height(); - ++cnt; - } - } - - if (cnt != 0) - return sum/cnt; - else - return 0; -} - -// ---------------------------------------------------------------------------------------- - -bool has_non_ignored_boxes (const image_dataset_metadata::image& img) -{ - for (auto&& b : img.boxes) - { - if (!b.ignore) - return true; - } - return false; -} - -// ---------------------------------------------------------------------------------------- - -int cluster_dataset( - const dlib::command_line_parser& parser -) -{ - // make sure the user entered an argument to this program - if (parser.number_of_arguments() != 1) - { - cerr << "The --cluster option requires you to give one XML file on the command line." << endl; - return EXIT_FAILURE; - } - - const unsigned long num_clusters = get_option(parser, "cluster", 2); - const unsigned long chip_size = get_option(parser, "size", 8000); - - image_dataset_metadata::dataset data; - - image_dataset_metadata::load_image_dataset_metadata(data, parser[0]); - set_current_dir(get_parent_directory(file(parser[0]))); - - const double aspect_ratio = mean_aspect_ratio(data); - - dlib::array > images; - std::vector > feats; - console_progress_indicator pbar(data.images.size()); - // extract all the object chips and HOG features. - cout << "Loading image data..." << endl; - for (unsigned long i = 0; i < data.images.size(); ++i) - { - pbar.print_status(i); - if (!has_non_ignored_boxes(data.images[i])) - continue; - - array2d img, chip; - load_image(img, data.images[i].filename); - - for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j) - { - if (data.images[i].boxes[j].ignore || data.images[i].boxes[j].rect.area() < 10) - continue; - drectangle rect = data.images[i].boxes[j].rect; - rect = set_aspect_ratio(rect, aspect_ratio); - extract_image_chip(img, chip_details(rect, chip_size), chip); - feats.push_back(extract_fhog_features(chip)); - images.push_back(chip); - } - } - - if (feats.size() == 0) - { - cerr << "No non-ignored object boxes found in the XML dataset. You can't cluster an empty dataset." << endl; - return EXIT_FAILURE; - } - - cout << "\nClustering objects..." << endl; - std::vector assignments = angular_cluster(feats, num_clusters); - - - // Now output each cluster to disk as an XML file. - for (unsigned long c = 0; c < num_clusters; ++c) - { - // We are going to accumulate all the image metadata for cluster c. We put it - // into idata so we can sort the images such that images with central chips - // come before less central chips. The idea being to get the good chips to - // show up first in the listing, making it easy to manually remove bad ones if - // that is desired. - std::vector > idata(data.images.size()); - unsigned long idx = 0; - for (unsigned long i = 0; i < data.images.size(); ++i) - { - idata[i].first = std::numeric_limits::infinity(); - idata[i].second.filename = data.images[i].filename; - if (!has_non_ignored_boxes(data.images[i])) - continue; - - for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j) - { - idata[i].second.boxes.push_back(data.images[i].boxes[j]); - - if (data.images[i].boxes[j].ignore || data.images[i].boxes[j].rect.area() < 10) - continue; - - // If this box goes into cluster c then update the score for the whole - // image based on this boxes' score. Otherwise, mark the box as - // ignored. - if (assignments[idx].c == c) - idata[i].first = std::min(idata[i].first, assignments[idx].dist); - else - idata[i].second.boxes.back().ignore = true; - - ++idx; - } - } - - // now save idata to an xml file. - std::sort(idata.begin(), idata.end(), compare_first); - image_dataset_metadata::dataset cdata; - cdata.comment = data.comment + "\n\n This file contains objects which were clustered into group " + - cast_to_string(c+1) + " of " + cast_to_string(num_clusters) + " groups with a chip size of " + - cast_to_string(chip_size) + " by imglab."; - cdata.name = data.name; - for (unsigned long i = 0; i < idata.size(); ++i) - { - // if this image has non-ignored boxes in it then include it in the output. - if (idata[i].first != std::numeric_limits::infinity()) - cdata.images.push_back(idata[i].second); - } - - string outfile = "cluster_"+pad_int_with_zeros(c+1, 3) + ".xml"; - cout << "Saving " << outfile << endl; - save_image_dataset_metadata(cdata, outfile); - } - - // Now output each cluster to disk as a big tiled jpeg file. Sort everything so, just - // like in the xml file above, the best objects come first in the tiling. - std::sort(assignments.begin(), assignments.end()); - for (unsigned long c = 0; c < num_clusters; ++c) - { - dlib::array > temp; - for (unsigned long i = 0; i < assignments.size(); ++i) - { - if (assignments[i].c == c) - temp.push_back(images[assignments[i].idx]); - } - - string outfile = "cluster_"+pad_int_with_zeros(c+1, 3) + ".jpg"; - cout << "Saving " << outfile << endl; - save_jpeg(tile_images(temp), outfile); - } - - - return EXIT_SUCCESS; -} - -// ---------------------------------------------------------------------------------------- - -- cgit v1.2.3