// Copyright (C) 2018 Davis E. King (davis@dlib.net) // License: Boost Software License See LICENSE.txt for the full license. #include "opaque_types.h" #include #include #include #include #include #include namespace pybind11 { // a version of bind_map that doesn't force it's own __repr__ on you. template , typename... Args> class_ bind_map_no_default_repr(handle scope, const std::string &name, Args&&... args) { using KeyType = typename Map::key_type; using MappedType = typename Map::mapped_type; using Class_ = class_; // If either type is a non-module-local bound type then make the map binding non-local as well; // otherwise (e.g. both types are either module-local or converting) the map will be // module-local. auto tinfo = detail::get_type_info(typeid(MappedType)); bool local = !tinfo || tinfo->module_local; if (local) { tinfo = detail::get_type_info(typeid(KeyType)); local = !tinfo || tinfo->module_local; } Class_ cl(scope, name.c_str(), pybind11::module_local(local), std::forward(args)...); cl.def(init<>()); cl.def("__bool__", [](const Map &m) -> bool { return !m.empty(); }, "Check whether the map is nonempty" ); cl.def("__iter__", [](Map &m) { return make_key_iterator(m.begin(), m.end()); }, keep_alive<0, 1>() /* Essential: keep list alive while iterator exists */ ); cl.def("items", [](Map &m) { return make_iterator(m.begin(), m.end()); }, keep_alive<0, 1>() /* Essential: keep list alive while iterator exists */ ); cl.def("__getitem__", [](Map &m, const KeyType &k) -> MappedType & { auto it = m.find(k); if (it == m.end()) throw key_error(); return it->second; }, return_value_policy::reference_internal // ref + keepalive ); // Assignment provided only if the type is copyable detail::map_assignment(cl); cl.def("__delitem__", [](Map &m, const KeyType &k) { auto it = m.find(k); if (it == m.end()) throw key_error(); return m.erase(it); } ); cl.def("__len__", &Map::size); return cl; } } using namespace dlib; using namespace std; using namespace dlib::image_dataset_metadata; namespace py = pybind11; dataset py_load_image_dataset_metadata( const std::string& filename ) { dataset temp; load_image_dataset_metadata(temp, filename); return temp; } std::shared_ptr> map_from_object(py::dict obj) { auto ret = std::make_shared>(); for (auto& v : obj) { (*ret)[v.first.cast()] = v.second.cast(); } return ret; } // ---------------------------------------------------------------------------------------- image_dataset_metadata::dataset py_make_bounding_box_regression_training_data ( const image_dataset_metadata::dataset& truth, const py::object& detections ) { try { // if detections is a std::vector then call like this. return make_bounding_box_regression_training_data(truth, detections.cast>&>()); } catch (py::cast_error&) { // otherwise, detections should be a list of std::vectors. py::list dets(detections); std::vector> temp; for (auto& d : dets) temp.emplace_back(d.cast&>()); return make_bounding_box_regression_training_data(truth, temp); } } // ---------------------------------------------------------------------------------------- void bind_image_dataset_metadata(py::module &m_) { auto m = m_.def_submodule("image_dataset_metadata", "Routines and objects for working with dlib's image dataset metadata XML files."); auto datasetstr = [](const dataset& item) { return "dlib.dataset_dataset_metadata.dataset: images:" + to_string(item.images.size()) + ", " + item.name; }; auto datasetrepr = [datasetstr](const dataset& item) { return "<"+datasetstr(item)+">"; }; py::class_(m, "dataset", "This object represents a labeled set of images. In particular, it contains the filename for each image as well as annotated boxes.") .def("__str__", datasetstr) .def("__repr__", datasetrepr) .def_readwrite("images", &dataset::images) .def_readwrite("comment", &dataset::comment) .def_readwrite("name", &dataset::name); auto imagestr = [](const image& item) { return "dlib.image_dataset_metadata.image: boxes:"+to_string(item.boxes.size())+ ", " + item.filename; }; auto imagerepr = [imagestr](const image& item) { return "<"+imagestr(item)+">"; }; py::class_(m, "image", "This object represents an annotated image.") .def_readwrite("filename", &image::filename) .def("__str__", imagestr) .def("__repr__", imagerepr) .def_readwrite("boxes", &image::boxes); auto partsstr = [](const std::map& item) { std::ostringstream sout; sout << "{"; for (auto& v : item) sout << "'" << v.first << "': " << v.second << ", "; sout << "}"; return sout.str(); }; auto partsrepr = [](const std::map& item) { std::ostringstream sout; sout << "dlib.image_dataset_metadata.parts({\n"; for (auto& v : item) sout << "'" << v.first << "': dlib.point" << v.second << ",\n"; sout << "})"; return sout.str(); }; py::bind_map_no_default_repr, std::shared_ptr> >(m, "parts", "This object is a dictionary mapping string names to object part locations.") .def(py::init(&map_from_object)) .def("__str__", partsstr) .def("__repr__", partsrepr); auto rectstr = [](const rectangle& r) { std::ostringstream sout; sout << "dlib.rectangle(" << r.left() << "," << r.top() << "," << r.right() << "," << r.bottom() << ")"; return sout.str(); }; auto boxstr = [rectstr](const box& item) { return "dlib.image_dataset_metadata.box at " + rectstr(item.rect); }; auto boxrepr = [boxstr](const box& item) { return "<"+boxstr(item)+">"; }; py::class_ pybox(m, "box", "This object represents an annotated rectangular area of an image. \n" "It is typically used to mark the location of an object such as a \n" "person, car, etc.\n" "\n" "The main variable of interest is rect. It gives the location of \n" "the box. All the other variables are optional." ); pybox .def("__str__", boxstr) .def("__repr__", boxrepr) .def_readwrite("rect", &box::rect) .def_readonly("parts", &box::parts) .def_readwrite("label", &box::label) .def_readwrite("difficult", &box::difficult) .def_readwrite("truncated", &box::truncated) .def_readwrite("occluded", &box::occluded) .def_readwrite("ignore", &box::ignore) .def_readwrite("pose", &box::pose) .def_readwrite("detection_score", &box::detection_score) .def_readwrite("angle", &box::angle) .def_readwrite("gender", &box::gender) .def_readwrite("age", &box::age); py::enum_(pybox,"gender_type") .value("MALE", gender_t::MALE) .value("FEMALE", gender_t::FEMALE) .value("UNKNOWN", gender_t::UNKNOWN) .export_values(); m.def("save_image_dataset_metadata", &save_image_dataset_metadata, py::arg("data"), py::arg("filename"), "Writes the contents of the meta object to a file with the given filename. The file will be in an XML format." ); m.def("load_image_dataset_metadata", &py_load_image_dataset_metadata, py::arg("filename"), "Attempts to interpret filename as a file containing XML formatted data as produced " "by the save_image_dataset_metadata() function. The data is loaded and returned as a dlib.image_dataset_metadata.dataset object." ); m_.def("make_bounding_box_regression_training_data", &py_make_bounding_box_regression_training_data, py::arg("truth"), py::arg("detections"), "requires \n\ - len(truth.images) == len(detections) \n\ - detections == A dlib.rectangless object or a list of dlib.rectangles. \n\ ensures \n\ - Suppose you have an object detector that can roughly locate objects in an \n\ image. This means your detector draws boxes around objects, but these are \n\ *rough* boxes in the sense that they aren't positioned super accurately. For \n\ instance, HOG based detectors usually have a stride of 8 pixels. So the \n\ positional accuracy is going to be, at best, +/-8 pixels. \n\ \n\ If you want to get better positional accuracy one easy thing to do is train a \n\ shape_predictor to give you the corners of the object. The \n\ make_bounding_box_regression_training_data() routine helps you do this by \n\ creating an appropriate training dataset. It does this by taking the dataset \n\ you used to train your detector (the truth object), and combining that with \n\ the output of your detector on each image in the training dataset (the \n\ detections object). In particular, it will create a new annotated dataset \n\ where each object box is one of the rectangles from detections and that \n\ object has 4 part annotations, the corners of the truth rectangle \n\ corresponding to that detection rectangle. You can then take the returned \n\ dataset and train a shape_predictor on it. The resulting shape_predictor can \n\ then be used to do bounding box regression. \n\ - We assume that detections[i] contains object detections corresponding to \n\ the image truth.images[i]." /*! requires - len(truth.images) == len(detections) - detections == A dlib.rectangless object or a list of dlib.rectangles. ensures - Suppose you have an object detector that can roughly locate objects in an image. This means your detector draws boxes around objects, but these are *rough* boxes in the sense that they aren't positioned super accurately. For instance, HOG based detectors usually have a stride of 8 pixels. So the positional accuracy is going to be, at best, +/-8 pixels. If you want to get better positional accuracy one easy thing to do is train a shape_predictor to give you the corners of the object. The make_bounding_box_regression_training_data() routine helps you do this by creating an appropriate training dataset. It does this by taking the dataset you used to train your detector (the truth object), and combining that with the output of your detector on each image in the training dataset (the detections object). In particular, it will create a new annotated dataset where each object box is one of the rectangles from detections and that object has 4 part annotations, the corners of the truth rectangle corresponding to that detection rectangle. You can then take the returned dataset and train a shape_predictor on it. The resulting shape_predictor can then be used to do bounding box regression. - We assume that detections[i] contains object detections corresponding to the image truth.images[i]. !*/ ); }