1 files changed, 510 insertions, 0 deletions
diff --git a/ml/dlib/dlib/data_io/load_image_dataset.h b/ml/dlib/dlib/data_io/load_image_dataset.h
new file mode 100644
index 000000000..5664d96b2
--- /dev/null
+++ b/ml/dlib/dlib/data_io/load_image_dataset.h
@@ -0,0 +1,510 @@
+// Copyright (C) 2012  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_LOAD_IMAGE_DaTASET_Hh_
+#define DLIB_LOAD_IMAGE_DaTASET_Hh_
+
+#include "load_image_dataset_abstract.h"
+#include "../misc_api.h"
+#include "../dir_nav.h"
+#include "../image_io.h"
+#include "../array.h"
+#include <vector>
+#include "../geometry.h"
+#include "image_dataset_metadata.h"
+#include <string>
+#include <set>
+#include "../image_processing/full_object_detection.h"
+#include <utility>
+#include <limits>
+#include "../image_transforms/image_pyramid.h"
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    class image_dataset_file
+    {
+    public:
+        image_dataset_file(const std::string& filename)
+        {
+            _skip_empty_images = false;
+            _have_parts = false;
+            _filename = filename;
+            _box_area_thresh = std::numeric_limits<double>::infinity();
+        }
+
+        image_dataset_file boxes_match_label(
+            const std::string& label
+        ) const
+        {
+            image_dataset_file temp(*this);
+            temp._labels.insert(label);
+            return temp;
+        }
+
+        image_dataset_file skip_empty_images(
+        ) const
+        {
+            image_dataset_file temp(*this);
+            temp._skip_empty_images = true;
+            return temp;
+        }
+
+        image_dataset_file boxes_have_parts(
+        ) const
+        {
+            image_dataset_file temp(*this);
+            temp._have_parts = true;
+            return temp;
+        }
+
+        image_dataset_file shrink_big_images(
+            double new_box_area_thresh = 150*150
+        ) const
+        {
+            image_dataset_file temp(*this);
+            temp._box_area_thresh = new_box_area_thresh;
+            return temp;
+        }
+
+        bool should_load_box (
+            const image_dataset_metadata::box& box
+        ) const
+        {
+            if (_have_parts && box.parts.size() == 0)
+                return false;
+            if (_labels.size() == 0)
+                return true;
+            if (_labels.count(box.label) != 0)
+                return true;
+            return false;
+        }
+
+        const std::string& get_filename() const { return _filename; }
+        bool should_skip_empty_images() const { return _skip_empty_images; }
+        bool should_boxes_have_parts() const { return _have_parts; }
+        double box_area_thresh() const { return _box_area_thresh; }
+        const std::set<std::string>& get_selected_box_labels() const { return _labels; }
+
+    private:
+        std::string _filename;
+        std::set<std::string> _labels;
+        bool _skip_empty_images;
+        bool _have_parts;
+        double _box_area_thresh;
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename array_type
+        >
+    std::vector<std::vector<rectangle> > load_image_dataset (
+        array_type& images,
+        std::vector<std::vector<rectangle> >& object_locations,
+        const image_dataset_file& source
+    )
+    {
+        images.clear();
+        object_locations.clear();
+
+        std::vector<std::vector<rectangle> > ignored_rects;
+
+        using namespace dlib::image_dataset_metadata;
+        dataset data;
+        load_image_dataset_metadata(data, source.get_filename());
+
+        // Set the current directory to be the one that contains the
+        // metadata file. We do this because the file might contain
+        // file paths which are relative to this folder.
+        locally_change_current_dir chdir(get_parent_directory(file(source.get_filename())));
+
+
+        typedef typename array_type::value_type image_type;
+
+
+        image_type img;
+        std::vector<rectangle> rects, ignored;
+        for (unsigned long i = 0; i < data.images.size(); ++i)
+        {
+            double min_rect_size = std::numeric_limits<double>::infinity();
+            rects.clear();
+            ignored.clear();
+            for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
+            {
+                if (source.should_load_box(data.images[i].boxes[j]))
+                {
+                    if (data.images[i].boxes[j].ignore)
+                    {
+                        ignored.push_back(data.images[i].boxes[j].rect);
+                    }
+                    else
+                    {
+                        rects.push_back(data.images[i].boxes[j].rect);
+                        min_rect_size = std::min<double>(min_rect_size, rects.back().area());
+                    }
+                }
+            }
+
+            if (!source.should_skip_empty_images() || rects.size() != 0)
+            {
+                load_image(img, data.images[i].filename);
+                if (rects.size() != 0)  
+                {
+                    // if shrinking the image would still result in the smallest box being
+                    // bigger than the box area threshold then shrink the image.
+                    while(min_rect_size/2/2 > source.box_area_thresh())
+                    {
+                        pyramid_down<2> pyr;
+                        pyr(img);
+                        min_rect_size *= (1.0/2.0)*(1.0/2.0);
+                        for (auto&& r : rects)
+                            r = pyr.rect_down(r);
+                        for (auto&& r : ignored)
+                            r = pyr.rect_down(r);
+                    }
+                    while(min_rect_size*(2.0/3.0)*(2.0/3.0) > source.box_area_thresh())
+                    {
+                        pyramid_down<3> pyr;
+                        pyr(img);
+                        min_rect_size *= (2.0/3.0)*(2.0/3.0);
+                        for (auto&& r : rects)
+                            r = pyr.rect_down(r);
+                        for (auto&& r : ignored)
+                            r = pyr.rect_down(r);
+                    }
+                }
+                images.push_back(img);
+                object_locations.push_back(rects);
+                ignored_rects.push_back(ignored);
+            }
+        }
+
+        return ignored_rects;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    namespace impl
+    {
+        inline size_t num_non_ignored_boxes (const std::vector<mmod_rect>& rects)
+        {
+            size_t cnt = 0;
+            for (auto& b : rects)
+            {
+                if (!b.ignore)
+                    cnt++;
+            }
+            return cnt;
+        }
+    }
+
+    template <
+        typename array_type
+        >
+    void load_image_dataset (
+        array_type& images,
+        std::vector<std::vector<mmod_rect> >& object_locations,
+        const image_dataset_file& source
+    )
+    {
+        images.clear();
+        object_locations.clear();
+
+        using namespace dlib::image_dataset_metadata;
+        dataset data;
+        load_image_dataset_metadata(data, source.get_filename());
+
+        // Set the current directory to be the one that contains the
+        // metadata file. We do this because the file might contain
+        // file paths which are relative to this folder.
+        locally_change_current_dir chdir(get_parent_directory(file(source.get_filename())));
+
+        typedef typename array_type::value_type image_type;
+
+        image_type img;
+        std::vector<mmod_rect> rects;
+        for (unsigned long i = 0; i < data.images.size(); ++i)
+        {
+            double min_rect_size = std::numeric_limits<double>::infinity();
+            rects.clear();
+            for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
+            {
+                if (source.should_load_box(data.images[i].boxes[j]))
+                {
+                    if (data.images[i].boxes[j].ignore)
+                    {
+                        rects.push_back(ignored_mmod_rect(data.images[i].boxes[j].rect));
+                    }
+                    else
+                    {
+                        rects.push_back(mmod_rect(data.images[i].boxes[j].rect));
+                        min_rect_size = std::min<double>(min_rect_size, rects.back().rect.area());
+                    }
+                    rects.back().label = data.images[i].boxes[j].label;
+
+                }
+            }
+
+            if (!source.should_skip_empty_images() || impl::num_non_ignored_boxes(rects) != 0)
+            {
+                load_image(img, data.images[i].filename);
+                if (rects.size() != 0)  
+                {
+                    // if shrinking the image would still result in the smallest box being
+                    // bigger than the box area threshold then shrink the image.
+                    while(min_rect_size/2/2 > source.box_area_thresh())
+                    {
+                        pyramid_down<2> pyr;
+                        pyr(img);
+                        min_rect_size *= (1.0/2.0)*(1.0/2.0);
+                        for (auto&& r : rects)
+                            r.rect = pyr.rect_down(r.rect);
+                    }
+                    while(min_rect_size*(2.0/3.0)*(2.0/3.0) > source.box_area_thresh())
+                    {
+                        pyramid_down<3> pyr;
+                        pyr(img);
+                        min_rect_size *= (2.0/3.0)*(2.0/3.0);
+                        for (auto&& r : rects)
+                            r.rect = pyr.rect_down(r.rect);
+                    }
+                }
+                images.push_back(std::move(img));
+                object_locations.push_back(std::move(rects));
+            }
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+// ******* THIS FUNCTION IS DEPRECATED, you should use another version of load_image_dataset() *******
+    template <
+        typename image_type, 
+        typename MM
+        >
+    std::vector<std::vector<rectangle> > load_image_dataset (
+        array<image_type,MM>& images,
+        std::vector<std::vector<rectangle> >& object_locations,
+        const std::string& filename,
+        const std::string& label,
+        bool skip_empty_images = false
+    )
+    {
+        image_dataset_file f(filename);
+        if (label.size() != 0)
+            f = f.boxes_match_label(label);
+        if (skip_empty_images)
+            f = f.skip_empty_images();
+        return load_image_dataset(images, object_locations, f);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename array_type
+        >
+    std::vector<std::vector<rectangle> > load_image_dataset (
+        array_type& images,
+        std::vector<std::vector<rectangle> >& object_locations,
+        const std::string& filename
+    )
+    {
+        return load_image_dataset(images, object_locations, image_dataset_file(filename));
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename array_type
+        >
+    void load_image_dataset (
+        array_type& images,
+        std::vector<std::vector<mmod_rect>>& object_locations,
+        const std::string& filename
+    )
+    {
+        load_image_dataset(images, object_locations, image_dataset_file(filename));
+    }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename array_type
+        >
+    std::vector<std::vector<rectangle> > load_image_dataset (
+        array_type& images,
+        std::vector<std::vector<full_object_detection> >& object_locations,
+        const image_dataset_file& source,
+        std::vector<std::string>& parts_list
+    )
+    {
+        typedef typename array_type::value_type image_type;
+        parts_list.clear();
+        images.clear();
+        object_locations.clear();
+
+        using namespace dlib::image_dataset_metadata;
+        dataset data;
+        load_image_dataset_metadata(data, source.get_filename());
+
+        // Set the current directory to be the one that contains the
+        // metadata file. We do this because the file might contain
+        // file paths which are relative to this folder.
+        locally_change_current_dir chdir(get_parent_directory(file(source.get_filename())));
+
+
+        std::set<std::string> all_parts;
+
+        // find out what parts are being used in the dataset.  Store results in all_parts.
+        for (unsigned long i = 0; i < data.images.size(); ++i)
+        {
+            for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
+            {
+                if (source.should_load_box(data.images[i].boxes[j]))
+                {
+                    const std::map<std::string,point>& parts = data.images[i].boxes[j].parts;
+                    std::map<std::string,point>::const_iterator itr;
+
+                    for (itr = parts.begin(); itr != parts.end(); ++itr)
+                    {
+                        all_parts.insert(itr->first);
+                    }
+                }
+            }
+        }
+
+        // make a mapping between part names and the integers [0, all_parts.size())
+        std::map<std::string,int> parts_idx;
+        for (std::set<std::string>::iterator i = all_parts.begin(); i != all_parts.end(); ++i)
+        {
+            parts_idx[*i] = parts_list.size();
+            parts_list.push_back(*i);
+        }
+
+        std::vector<std::vector<rectangle> > ignored_rects;
+        std::vector<rectangle> ignored;
+        image_type img;
+        std::vector<full_object_detection> object_dets;
+        for (unsigned long i = 0; i < data.images.size(); ++i)
+        {
+            double min_rect_size = std::numeric_limits<double>::infinity();
+            object_dets.clear();
+            ignored.clear();
+            for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
+            {
+                if (source.should_load_box(data.images[i].boxes[j]))
+                {
+                    if (data.images[i].boxes[j].ignore)
+                    {
+                        ignored.push_back(data.images[i].boxes[j].rect);
+                    }
+                    else
+                    {
+                        std::vector<point> partlist(parts_idx.size(), OBJECT_PART_NOT_PRESENT);
+
+                        // populate partlist with all the parts present in this box.
+                        const std::map<std::string,point>& parts = data.images[i].boxes[j].parts;
+                        std::map<std::string,point>::const_iterator itr;
+                        for (itr = parts.begin(); itr != parts.end(); ++itr)
+                        {
+                            partlist[parts_idx[itr->first]] = itr->second;
+                        }
+
+                        object_dets.push_back(full_object_detection(data.images[i].boxes[j].rect, partlist));
+                        min_rect_size = std::min<double>(min_rect_size, object_dets.back().get_rect().area());
+                    }
+                }
+            }
+
+            if (!source.should_skip_empty_images() || object_dets.size() != 0)
+            {
+                load_image(img, data.images[i].filename);
+                if (object_dets.size() != 0)  
+                {
+                    // if shrinking the image would still result in the smallest box being
+                    // bigger than the box area threshold then shrink the image.
+                    while(min_rect_size/2/2 > source.box_area_thresh())
+                    {
+                        pyramid_down<2> pyr;
+                        pyr(img);
+                        min_rect_size *= (1.0/2.0)*(1.0/2.0);
+                        for (auto&& r : object_dets)
+                        {
+                            r.get_rect() = pyr.rect_down(r.get_rect());
+                            for (unsigned long k = 0; k < r.num_parts(); ++k)
+                                r.part(k) = pyr.point_down(r.part(k));
+                        }
+                        for (auto&& r : ignored)
+                        {
+                            r = pyr.rect_down(r);
+                        }
+                    }
+                    while(min_rect_size*(2.0/3.0)*(2.0/3.0) > source.box_area_thresh())
+                    {
+                        pyramid_down<3> pyr;
+                        pyr(img);
+                        min_rect_size *= (2.0/3.0)*(2.0/3.0);
+                        for (auto&& r : object_dets)
+                        {
+                            r.get_rect() = pyr.rect_down(r.get_rect());
+                            for (unsigned long k = 0; k < r.num_parts(); ++k)
+                                r.part(k) = pyr.point_down(r.part(k));
+                        }
+                        for (auto&& r : ignored)
+                        {
+                            r = pyr.rect_down(r);
+                        }
+                    }
+                }
+                images.push_back(img);
+                object_locations.push_back(object_dets);
+                ignored_rects.push_back(ignored);
+            }
+        }
+
+
+        return ignored_rects;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename array_type
+        >
+    std::vector<std::vector<rectangle> > load_image_dataset (
+        array_type& images,
+        std::vector<std::vector<full_object_detection> >& object_locations,
+        const image_dataset_file& source 
+    )
+    {
+        std::vector<std::string> parts_list;
+        return load_image_dataset(images, object_locations, source, parts_list);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename array_type 
+        >
+    std::vector<std::vector<rectangle> > load_image_dataset (
+        array_type& images,
+        std::vector<std::vector<full_object_detection> >& object_locations,
+        const std::string& filename
+    )
+    {
+        std::vector<std::string> parts_list;
+        return load_image_dataset(images, object_locations, image_dataset_file(filename), parts_list);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_LOAD_IMAGE_DaTASET_Hh_
+