1 files changed, 279 insertions, 0 deletions
diff --git a/ml/dlib/tools/python/src/image_dataset_metadata.cpp b/ml/dlib/tools/python/src/image_dataset_metadata.cpp
new file mode 100644
index 000000000..8f23ddd3f
--- /dev/null
+++ b/ml/dlib/tools/python/src/image_dataset_metadata.cpp
@@ -0,0 +1,279 @@
+// Copyright (C) 2018  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+
+#include "opaque_types.h"
+#include <dlib/python.h>
+#include <dlib/data_io.h>
+#include <dlib/image_processing.h>
+#include <pybind11/stl_bind.h>
+#include <pybind11/stl.h>
+#include <iostream>
+
+namespace pybind11
+{
+
+    // a version of bind_map that doesn't force it's own __repr__ on you.
+template <typename Map, typename holder_type = std::unique_ptr<Map>, typename... Args>
+class_<Map, holder_type> bind_map_no_default_repr(handle scope, const std::string &name, Args&&... args) {
+    using KeyType = typename Map::key_type;
+    using MappedType = typename Map::mapped_type;
+    using Class_ = class_<Map, holder_type>;
+
+    // If either type is a non-module-local bound type then make the map binding non-local as well;
+    // otherwise (e.g. both types are either module-local or converting) the map will be
+    // module-local.
+    auto tinfo = detail::get_type_info(typeid(MappedType));
+    bool local = !tinfo || tinfo->module_local;
+    if (local) {
+        tinfo = detail::get_type_info(typeid(KeyType));
+        local = !tinfo || tinfo->module_local;
+    }
+
+    Class_ cl(scope, name.c_str(), pybind11::module_local(local), std::forward<Args>(args)...);
+
+    cl.def(init<>());
+
+
+    cl.def("__bool__",
+        [](const Map &m) -> bool { return !m.empty(); },
+        "Check whether the map is nonempty"
+    );
+
+    cl.def("__iter__",
+           [](Map &m) { return make_key_iterator(m.begin(), m.end()); },
+           keep_alive<0, 1>() /* Essential: keep list alive while iterator exists */
+    );
+
+    cl.def("items",
+           [](Map &m) { return make_iterator(m.begin(), m.end()); },
+           keep_alive<0, 1>() /* Essential: keep list alive while iterator exists */
+    );
+
+    cl.def("__getitem__",
+        [](Map &m, const KeyType &k) -> MappedType & {
+            auto it = m.find(k);
+            if (it == m.end())
+              throw key_error();
+           return it->second;
+        },
+        return_value_policy::reference_internal // ref + keepalive
+    );
+
+    // Assignment provided only if the type is copyable
+    detail::map_assignment<Map, Class_>(cl);
+
+    cl.def("__delitem__",
+           [](Map &m, const KeyType &k) {
+               auto it = m.find(k);
+               if (it == m.end())
+                   throw key_error();
+               return m.erase(it);
+           }
+    );
+
+    cl.def("__len__", &Map::size);
+
+    return cl;
+}
+
+}
+
+using namespace dlib;
+using namespace std;
+using namespace dlib::image_dataset_metadata;
+
+namespace py = pybind11;
+
+
+dataset py_load_image_dataset_metadata(
+    const std::string& filename
+)
+{
+    dataset temp;
+    load_image_dataset_metadata(temp, filename);
+    return temp;
+}
+
+std::shared_ptr<std::map<std::string,point>> map_from_object(py::dict obj)
+{
+    auto ret = std::make_shared<std::map<std::string,point>>();
+    for (auto& v : obj)
+    {
+        (*ret)[v.first.cast<std::string>()] = v.second.cast<point>();
+    }
+    return ret;
+}
+
+// ----------------------------------------------------------------------------------------
+
+image_dataset_metadata::dataset py_make_bounding_box_regression_training_data (
+    const image_dataset_metadata::dataset& truth,
+    const py::object& detections
+)
+{
+    try
+    {
+        // if detections is a std::vector then call like this.
+        return make_bounding_box_regression_training_data(truth, detections.cast<const std::vector<std::vector<rectangle>>&>());
+    }
+    catch (py::cast_error&)
+    {
+        // otherwise, detections should be a list of std::vectors.
+        py::list dets(detections);
+        std::vector<std::vector<rectangle>> temp;
+        for (auto& d : dets)
+            temp.emplace_back(d.cast<const std::vector<rectangle>&>());
+        return make_bounding_box_regression_training_data(truth, temp);
+    }
+}
+
+// ----------------------------------------------------------------------------------------
+
+void bind_image_dataset_metadata(py::module &m_)
+{
+    auto m = m_.def_submodule("image_dataset_metadata", "Routines and objects for working with dlib's image dataset metadata XML files.");
+
+    auto datasetstr  = [](const dataset& item) { return  "dlib.dataset_dataset_metadata.dataset: images:" + to_string(item.images.size()) + ", " + item.name; };
+    auto datasetrepr = [datasetstr](const dataset& item) { return "<"+datasetstr(item)+">"; };
+    py::class_<dataset>(m, "dataset",
+                    "This object represents a labeled set of images.  In particular, it contains the filename for each image as well as annotated boxes.")
+        .def("__str__", datasetstr)
+        .def("__repr__", datasetrepr)
+        .def_readwrite("images", &dataset::images)
+        .def_readwrite("comment", &dataset::comment)
+        .def_readwrite("name", &dataset::name);
+
+    auto imagestr  = [](const image& item) { return  "dlib.image_dataset_metadata.image: boxes:"+to_string(item.boxes.size())+ ", " + item.filename; };
+    auto imagerepr = [imagestr](const image& item) { return "<"+imagestr(item)+">"; };
+    py::class_<image>(m, "image", "This object represents an annotated image.")
+        .def_readwrite("filename", &image::filename)
+        .def("__str__", imagestr)
+        .def("__repr__", imagerepr)
+        .def_readwrite("boxes", &image::boxes);
+
+
+    auto partsstr = [](const std::map<std::string,point>& item) {
+        std::ostringstream sout;
+        sout << "{";
+        for (auto& v : item) 
+            sout << "'" << v.first << "': " << v.second << ", ";
+        sout << "}";
+        return sout.str();
+    };
+    auto partsrepr = [](const std::map<std::string,point>& item) {
+        std::ostringstream sout;
+        sout << "dlib.image_dataset_metadata.parts({\n";
+        for (auto& v : item) 
+            sout << "'" << v.first << "': dlib.point" << v.second << ",\n";
+        sout << "})";
+        return sout.str();
+    };
+
+    py::bind_map_no_default_repr<std::map<std::string,point>, std::shared_ptr<std::map<std::string,point>> >(m, "parts", 
+        "This object is a dictionary mapping string names to object part locations.")
+        .def(py::init(&map_from_object))
+        .def("__str__", partsstr)
+        .def("__repr__", partsrepr);
+
+
+    auto rectstr = [](const rectangle& r) {
+        std::ostringstream sout;
+        sout << "dlib.rectangle(" << r.left() << "," << r.top() << "," << r.right() << "," << r.bottom() << ")";
+        return sout.str();
+    };
+    auto boxstr  = [rectstr](const box& item) { return "dlib.image_dataset_metadata.box at " + rectstr(item.rect); }; 
+    auto boxrepr = [boxstr](const box& item) { return "<"+boxstr(item)+">"; };
+    py::class_<box> pybox(m, "box", 
+        "This object represents an annotated rectangular area of an image. \n"
+        "It is typically used to mark the location of an object such as a \n"
+        "person, car, etc.\n"
+        "\n"
+        "The main variable of interest is rect.  It gives the location of \n"
+        "the box.  All the other variables are optional." ); pybox
+        .def("__str__", boxstr)
+        .def("__repr__", boxrepr)
+        .def_readwrite("rect",            &box::rect)
+        .def_readonly("parts",           &box::parts)
+        .def_readwrite("label",           &box::label)
+        .def_readwrite("difficult",       &box::difficult)
+        .def_readwrite("truncated",       &box::truncated)
+        .def_readwrite("occluded",        &box::occluded)
+        .def_readwrite("ignore",          &box::ignore)
+        .def_readwrite("pose",            &box::pose)
+        .def_readwrite("detection_score", &box::detection_score)
+        .def_readwrite("angle",           &box::angle)
+        .def_readwrite("gender",          &box::gender)
+        .def_readwrite("age",             &box::age);
+
+    py::enum_<gender_t>(pybox,"gender_type")
+        .value("MALE", gender_t::MALE)
+        .value("FEMALE", gender_t::FEMALE)
+        .value("UNKNOWN", gender_t::UNKNOWN)
+        .export_values();
+
+
+    m.def("save_image_dataset_metadata", &save_image_dataset_metadata, py::arg("data"), py::arg("filename"),
+        "Writes the contents of the meta object to a file with the given filename.  The file will be in an XML format."
+        );
+
+    m.def("load_image_dataset_metadata", &py_load_image_dataset_metadata, py::arg("filename"),
+        "Attempts to interpret filename as a file containing XML formatted data as produced "
+        "by the save_image_dataset_metadata() function.  The data is loaded and returned as a dlib.image_dataset_metadata.dataset object."
+        );
+
+    m_.def("make_bounding_box_regression_training_data", &py_make_bounding_box_regression_training_data, 
+        py::arg("truth"), py::arg("detections"),
+"requires \n\
+    - len(truth.images) == len(detections) \n\
+    - detections == A dlib.rectangless object or a list of dlib.rectangles. \n\
+ensures \n\
+    - Suppose you have an object detector that can roughly locate objects in an \n\
+      image.  This means your detector draws boxes around objects, but these are \n\
+      *rough* boxes in the sense that they aren't positioned super accurately.  For \n\
+      instance, HOG based detectors usually have a stride of 8 pixels.  So the \n\
+      positional accuracy is going to be, at best, +/-8 pixels.   \n\
+       \n\
+      If you want to get better positional accuracy one easy thing to do is train a \n\
+      shape_predictor to give you the corners of the object.  The \n\
+      make_bounding_box_regression_training_data() routine helps you do this by \n\
+      creating an appropriate training dataset.  It does this by taking the dataset \n\
+      you used to train your detector (the truth object), and combining that with \n\
+      the output of your detector on each image in the training dataset (the \n\
+      detections object).  In particular, it will create a new annotated dataset \n\
+      where each object box is one of the rectangles from detections and that \n\
+      object has 4 part annotations, the corners of the truth rectangle \n\
+      corresponding to that detection rectangle.  You can then take the returned \n\
+      dataset and train a shape_predictor on it.  The resulting shape_predictor can \n\
+      then be used to do bounding box regression. \n\
+    - We assume that detections[i] contains object detections corresponding to  \n\
+      the image truth.images[i]." 
+    /*!
+        requires
+            - len(truth.images) == len(detections)
+            - detections == A dlib.rectangless object or a list of dlib.rectangles.
+        ensures
+            - Suppose you have an object detector that can roughly locate objects in an
+              image.  This means your detector draws boxes around objects, but these are
+              *rough* boxes in the sense that they aren't positioned super accurately.  For
+              instance, HOG based detectors usually have a stride of 8 pixels.  So the
+              positional accuracy is going to be, at best, +/-8 pixels.  
+              
+              If you want to get better positional accuracy one easy thing to do is train a
+              shape_predictor to give you the corners of the object.  The
+              make_bounding_box_regression_training_data() routine helps you do this by
+              creating an appropriate training dataset.  It does this by taking the dataset
+              you used to train your detector (the truth object), and combining that with
+              the output of your detector on each image in the training dataset (the
+              detections object).  In particular, it will create a new annotated dataset
+              where each object box is one of the rectangles from detections and that
+              object has 4 part annotations, the corners of the truth rectangle
+              corresponding to that detection rectangle.  You can then take the returned
+              dataset and train a shape_predictor on it.  The resulting shape_predictor can
+              then be used to do bounding box regression.
+            - We assume that detections[i] contains object detections corresponding to 
+              the image truth.images[i].
+    !*/
+    );
+}
+
+