diff options
Diffstat (limited to 'ml/dlib/tools/python/src')
31 files changed, 6639 insertions, 0 deletions
diff --git a/ml/dlib/tools/python/src/basic.cpp b/ml/dlib/tools/python/src/basic.cpp new file mode 100644 index 00000000..d87a53cc --- /dev/null +++ b/ml/dlib/tools/python/src/basic.cpp @@ -0,0 +1,272 @@ +// Copyright (C) 2013 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#include <dlib/python.h> +#include <dlib/matrix.h> +#include <sstream> +#include <string> +#include "opaque_types.h" + +#include <dlib/string.h> +#include <pybind11/stl_bind.h> + +using namespace std; +using namespace dlib; +namespace py = pybind11; + + +std::shared_ptr<std::vector<double> > array_from_object(py::object obj) +{ + try { + long nr = obj.cast<long>(); + return std::make_shared<std::vector<double>>(nr); + } catch (py::cast_error &e) { + py::list li = obj.cast<py::list>(); + const long nr = len(li); + auto temp = std::make_shared<std::vector<double>>(nr); + for ( long r = 0; r < nr; ++r) + { + (*temp)[r] = li[r].cast<double>(); + } + return temp; + } +} + +string array__str__ (const std::vector<double>& v) +{ + std::ostringstream sout; + for (unsigned long i = 0; i < v.size(); ++i) + { + sout << v[i]; + if (i+1 < v.size()) + sout << "\n"; + } + return sout.str(); +} + +string array__repr__ (const std::vector<double>& v) +{ + std::ostringstream sout; + sout << "dlib.array(["; + for (unsigned long i = 0; i < v.size(); ++i) + { + sout << v[i]; + if (i+1 < v.size()) + sout << ", "; + } + sout << "])"; + return sout.str(); +} + +string range__str__ (const std::pair<unsigned long,unsigned long>& p) +{ + std::ostringstream sout; + sout << p.first << ", " << p.second; + return sout.str(); +} + +string range__repr__ (const std::pair<unsigned long,unsigned long>& p) +{ + std::ostringstream sout; + sout << "dlib.range(" << p.first << ", " << p.second << ")"; + return sout.str(); +} + +struct range_iter +{ + std::pair<unsigned long,unsigned long> range; + unsigned long cur; + + unsigned long next() + { + if (cur < range.second) + { + return cur++; + } + else + { + PyErr_SetString(PyExc_StopIteration, "No more data."); + throw py::error_already_set(); + } + } +}; + +range_iter make_range_iterator (const std::pair<unsigned long,unsigned long>& p) +{ + range_iter temp; + temp.range = p; + temp.cur = p.first; + return temp; +} + +string pair__str__ (const std::pair<unsigned long,double>& p) +{ + std::ostringstream sout; + sout << p.first << ": " << p.second; + return sout.str(); +} + +string pair__repr__ (const std::pair<unsigned long,double>& p) +{ + std::ostringstream sout; + sout << "dlib.pair(" << p.first << ", " << p.second << ")"; + return sout.str(); +} + +string sparse_vector__str__ (const std::vector<std::pair<unsigned long,double> >& v) +{ + std::ostringstream sout; + for (unsigned long i = 0; i < v.size(); ++i) + { + sout << v[i].first << ": " << v[i].second; + if (i+1 < v.size()) + sout << "\n"; + } + return sout.str(); +} + +string sparse_vector__repr__ (const std::vector<std::pair<unsigned long,double> >& v) +{ + std::ostringstream sout; + sout << "< dlib.sparse_vector containing: \n" << sparse_vector__str__(v) << " >"; + return sout.str(); +} + +unsigned long range_len(const std::pair<unsigned long, unsigned long>& r) +{ + if (r.second > r.first) + return r.second-r.first; + else + return 0; +} + +template <typename T> +void resize(T& v, unsigned long n) { v.resize(n); } + +void bind_basic_types(py::module& m) +{ + { + typedef double item_type; + typedef std::vector<item_type> type; + typedef std::shared_ptr<type> type_ptr; + py::bind_vector<type, type_ptr >(m, "array", "This object represents a 1D array of floating point numbers. " + "Moreover, it binds directly to the C++ type std::vector<double>.") + .def(py::init(&array_from_object)) + .def("__str__", array__str__) + .def("__repr__", array__repr__) + .def("clear", &type::clear) + .def("resize", resize<type>) + .def("extend", extend_vector_with_python_list<item_type>) + .def(py::pickle(&getstate<type>, &setstate<type>)); + } + + { + typedef matrix<double,0,1> item_type; + typedef std::vector<item_type > type; + py::bind_vector<type>(m, "vectors", "This object is an array of vector objects.") + .def("clear", &type::clear) + .def("resize", resize<type>) + .def("extend", extend_vector_with_python_list<item_type>) + .def(py::pickle(&getstate<type>, &setstate<type>)); + } + + { + typedef std::vector<matrix<double,0,1> > item_type; + typedef std::vector<item_type > type; + py::bind_vector<type>(m, "vectorss", "This object is an array of arrays of vector objects.") + .def("clear", &type::clear) + .def("resize", resize<type>) + .def("extend", extend_vector_with_python_list<item_type>) + .def(py::pickle(&getstate<type>, &setstate<type>)); + } + + typedef pair<unsigned long,unsigned long> range_type; + py::class_<range_type>(m, "range", "This object is used to represent a range of elements in an array.") + .def(py::init<unsigned long,unsigned long>()) + .def_readwrite("begin",&range_type::first, "The index of the first element in the range. This is represented using an unsigned integer.") + .def_readwrite("end",&range_type::second, "One past the index of the last element in the range. This is represented using an unsigned integer.") + .def("__str__", range__str__) + .def("__repr__", range__repr__) + .def("__iter__", &make_range_iterator) + .def("__len__", &range_len) + .def(py::pickle(&getstate<range_type>, &setstate<range_type>)); + + py::class_<range_iter>(m, "_range_iter") + .def("next", &range_iter::next) + .def("__next__", &range_iter::next); + + { + typedef std::pair<unsigned long, unsigned long> item_type; + typedef std::vector<item_type > type; + py::bind_vector<type>(m, "ranges", "This object is an array of range objects.") + .def("clear", &type::clear) + .def("resize", resize<type>) + .def("extend", extend_vector_with_python_list<item_type>) + .def(py::pickle(&getstate<type>, &setstate<type>)); + } + + { + typedef std::vector<std::pair<unsigned long, unsigned long> > item_type; + typedef std::vector<item_type > type; + py::bind_vector<type>(m, "rangess", "This object is an array of arrays of range objects.") + .def("clear", &type::clear) + .def("resize", resize<type>) + .def("extend", extend_vector_with_python_list<item_type>) + .def(py::pickle(&getstate<type>, &setstate<type>)); + } + + + typedef pair<unsigned long,double> pair_type; + py::class_<pair_type>(m, "pair", "This object is used to represent the elements of a sparse_vector.") + .def(py::init<unsigned long,double>()) + .def_readwrite("first",&pair_type::first, "This field represents the index/dimension number.") + .def_readwrite("second",&pair_type::second, "This field contains the value in a vector at dimension specified by the first field.") + .def("__str__", pair__str__) + .def("__repr__", pair__repr__) + .def(py::pickle(&getstate<pair_type>, &setstate<pair_type>)); + + { + typedef std::vector<pair_type> type; + py::bind_vector<type>(m, "sparse_vector", +"This object represents the mathematical idea of a sparse column vector. It is \n\ +simply an array of dlib.pair objects, each representing an index/value pair in \n\ +the vector. Any elements of the vector which are missing are implicitly set to \n\ +zero. \n\ + \n\ +Unless otherwise noted, any routines taking a sparse_vector assume the sparse \n\ +vector is sorted and has unique elements. That is, the index values of the \n\ +pairs in a sparse_vector should be listed in increasing order and there should \n\ +not be duplicates. However, some functions work with \"unsorted\" sparse \n\ +vectors. These are dlib.sparse_vector objects that have either duplicate \n\ +entries or non-sorted index values. Note further that you can convert an \n\ +\"unsorted\" sparse_vector into a properly sorted sparse vector by calling \n\ +dlib.make_sparse_vector() on it. " + ) + .def("__str__", sparse_vector__str__) + .def("__repr__", sparse_vector__repr__) + .def("clear", &type::clear) + .def("resize", resize<type>) + .def("extend", extend_vector_with_python_list<pair_type>) + .def(py::pickle(&getstate<type>, &setstate<type>)); + } + + { + typedef std::vector<pair_type> item_type; + typedef std::vector<item_type > type; + py::bind_vector<type>(m, "sparse_vectors", "This object is an array of sparse_vector objects.") + .def("clear", &type::clear) + .def("resize", resize<type>) + .def("extend", extend_vector_with_python_list<item_type>) + .def(py::pickle(&getstate<type>, &setstate<type>)); + } + + { + typedef std::vector<std::vector<pair_type> > item_type; + typedef std::vector<item_type > type; + py::bind_vector<type>(m, "sparse_vectorss", "This object is an array of arrays of sparse_vector objects.") + .def("clear", &type::clear) + .def("resize", resize<type>) + .def("extend", extend_vector_with_python_list<item_type>) + .def(py::pickle(&getstate<type>, &setstate<type>)); + } +} + diff --git a/ml/dlib/tools/python/src/cca.cpp b/ml/dlib/tools/python/src/cca.cpp new file mode 100644 index 00000000..dcf47652 --- /dev/null +++ b/ml/dlib/tools/python/src/cca.cpp @@ -0,0 +1,137 @@ +// Copyright (C) 2013 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. + +#include "opaque_types.h" +#include <dlib/python.h> +#include <dlib/statistics.h> + +using namespace dlib; +namespace py = pybind11; + +typedef std::vector<std::pair<unsigned long,double> > sparse_vect; + +struct cca_outputs +{ + matrix<double,0,1> correlations; + matrix<double> Ltrans; + matrix<double> Rtrans; +}; + +cca_outputs _cca1 ( + const std::vector<sparse_vect>& L, + const std::vector<sparse_vect>& R, + unsigned long num_correlations, + unsigned long extra_rank, + unsigned long q, + double regularization +) +{ + pyassert(num_correlations > 0 && L.size() > 0 && R.size() > 0 && L.size() == R.size() && regularization >= 0, + "Invalid inputs"); + + cca_outputs temp; + temp.correlations = cca(L,R,temp.Ltrans,temp.Rtrans,num_correlations,extra_rank,q,regularization); + return temp; +} + +// ---------------------------------------------------------------------------------------- + +unsigned long sparse_vector_max_index_plus_one ( + const sparse_vect& v +) +{ + return max_index_plus_one(v); +} + +matrix<double,0,1> apply_cca_transform ( + const matrix<double>& m, + const sparse_vect& v +) +{ + pyassert((long)max_index_plus_one(v) <= m.nr(), "Invalid Inputs"); + return sparse_matrix_vector_multiply(trans(m), v); +} + +void bind_cca(py::module& m) +{ + py::class_<cca_outputs>(m, "cca_outputs") + .def_readwrite("correlations", &cca_outputs::correlations) + .def_readwrite("Ltrans", &cca_outputs::Ltrans) + .def_readwrite("Rtrans", &cca_outputs::Rtrans); + + m.def("max_index_plus_one", sparse_vector_max_index_plus_one, py::arg("v"), +"ensures \n\ + - returns the dimensionality of the given sparse vector. That is, returns a \n\ + number one larger than the maximum index value in the vector. If the vector \n\ + is empty then returns 0. " + ); + + + m.def("apply_cca_transform", apply_cca_transform, py::arg("m"), py::arg("v"), +"requires \n\ + - max_index_plus_one(v) <= m.nr() \n\ +ensures \n\ + - returns trans(m)*v \n\ + (i.e. multiply m by the vector v and return the result) " + ); + + + m.def("cca", _cca1, py::arg("L"), py::arg("R"), py::arg("num_correlations"), py::arg("extra_rank")=5, py::arg("q")=2, py::arg("regularization")=0, +"requires \n\ + - num_correlations > 0 \n\ + - len(L) > 0 \n\ + - len(R) > 0 \n\ + - len(L) == len(R) \n\ + - regularization >= 0 \n\ + - L and R must be properly sorted sparse vectors. This means they must list their \n\ + elements in ascending index order and not contain duplicate index values. You can use \n\ + make_sparse_vector() to ensure this is true. \n\ +ensures \n\ + - This function performs a canonical correlation analysis between the vectors \n\ + in L and R. That is, it finds two transformation matrices, Ltrans and \n\ + Rtrans, such that row vectors in the transformed matrices L*Ltrans and \n\ + R*Rtrans are as correlated as possible (note that in this notation we \n\ + interpret L as a matrix with the input vectors in its rows). Note also that \n\ + this function tries to find transformations which produce num_correlations \n\ + dimensional output vectors. \n\ + - Note that you can easily apply the transformation to a vector using \n\ + apply_cca_transform(). So for example, like this: \n\ + - apply_cca_transform(Ltrans, some_sparse_vector) \n\ + - returns a structure containing the Ltrans and Rtrans transformation matrices \n\ + as well as the estimated correlations between elements of the transformed \n\ + vectors. \n\ + - This function assumes the data vectors in L and R have already been centered \n\ + (i.e. we assume the vectors have zero means). However, in many cases it is \n\ + fine to use uncentered data with cca(). But if it is important for your \n\ + problem then you should center your data before passing it to cca(). \n\ + - This function works with reduced rank approximations of the L and R matrices. \n\ + This makes it fast when working with large matrices. In particular, we use \n\ + the dlib::svd_fast() routine to find reduced rank representations of the input \n\ + matrices by calling it as follows: svd_fast(L, U,D,V, num_correlations+extra_rank, q) \n\ + and similarly for R. This means that you can use the extra_rank and q \n\ + arguments to cca() to influence the accuracy of the reduced rank \n\ + approximation. However, the default values should work fine for most \n\ + problems. \n\ + - The dimensions of the output vectors produced by L*#Ltrans or R*#Rtrans are \n\ + ordered such that the dimensions with the highest correlations come first. \n\ + That is, after applying the transforms produced by cca() to a set of vectors \n\ + you will find that dimension 0 has the highest correlation, then dimension 1 \n\ + has the next highest, and so on. This also means that the list of estimated \n\ + correlations returned from cca() will always be listed in decreasing order. \n\ + - This function performs the ridge regression version of Canonical Correlation \n\ + Analysis when regularization is set to a value > 0. In particular, larger \n\ + values indicate the solution should be more heavily regularized. This can be \n\ + useful when the dimensionality of the data is larger than the number of \n\ + samples. \n\ + - A good discussion of CCA can be found in the paper \"Canonical Correlation \n\ + Analysis\" by David Weenink. In particular, this function is implemented \n\ + using equations 29 and 30 from his paper. We also use the idea of doing CCA \n\ + on a reduced rank approximation of L and R as suggested by Paramveer S. \n\ + Dhillon in his paper \"Two Step CCA: A new spectral method for estimating \n\ + vector models of words\". " + + ); +} + + + diff --git a/ml/dlib/tools/python/src/cnn_face_detector.cpp b/ml/dlib/tools/python/src/cnn_face_detector.cpp new file mode 100644 index 00000000..f18d99d9 --- /dev/null +++ b/ml/dlib/tools/python/src/cnn_face_detector.cpp @@ -0,0 +1,183 @@ +// Copyright (C) 2017 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. + +#include "opaque_types.h" +#include <dlib/python.h> +#include <dlib/matrix.h> +#include <dlib/dnn.h> +#include <dlib/image_transforms.h> +#include "indexing.h" +#include <pybind11/stl_bind.h> + +using namespace dlib; +using namespace std; + +namespace py = pybind11; + + +class cnn_face_detection_model_v1 +{ + +public: + + cnn_face_detection_model_v1(const std::string& model_filename) + { + deserialize(model_filename) >> net; + } + + std::vector<mmod_rect> detect ( + py::object pyimage, + const int upsample_num_times + ) + { + pyramid_down<2> pyr; + std::vector<mmod_rect> rects; + + // Copy the data into dlib based objects + matrix<rgb_pixel> image; + if (is_gray_python_image(pyimage)) + assign_image(image, numpy_gray_image(pyimage)); + else if (is_rgb_python_image(pyimage)) + assign_image(image, numpy_rgb_image(pyimage)); + else + throw dlib::error("Unsupported image type, must be 8bit gray or RGB image."); + + // Upsampling the image will allow us to detect smaller faces but will cause the + // program to use more RAM and run longer. + unsigned int levels = upsample_num_times; + while (levels > 0) + { + levels--; + pyramid_up(image, pyr); + } + + auto dets = net(image); + + // Scale the detection locations back to the original image size + // if the image was upscaled. + for (auto&& d : dets) { + d.rect = pyr.rect_down(d.rect, upsample_num_times); + rects.push_back(d); + } + + return rects; + } + + std::vector<std::vector<mmod_rect> > detect_mult ( + py::list imgs, + const int upsample_num_times, + const int batch_size = 128 + ) + { + pyramid_down<2> pyr; + std::vector<matrix<rgb_pixel> > dimgs; + dimgs.reserve(len(imgs)); + + for(int i = 0; i < len(imgs); i++) + { + // Copy the data into dlib based objects + matrix<rgb_pixel> image; + py::object tmp = imgs[i].cast<py::object>(); + if (is_gray_python_image(tmp)) + assign_image(image, numpy_gray_image(tmp)); + else if (is_rgb_python_image(tmp)) + assign_image(image, numpy_rgb_image(tmp)); + else + throw dlib::error("Unsupported image type, must be 8bit gray or RGB image."); + + for(int i = 0; i < upsample_num_times; i++) + { + pyramid_up(image); + } + dimgs.push_back(image); + } + + for(int i = 1; i < dimgs.size(); i++) + { + if + ( + dimgs[i - 1].nc() != dimgs[i].nc() || + dimgs[i - 1].nr() != dimgs[i].nr() + ) + throw dlib::error("Images in list must all have the same dimensions."); + + } + + auto dets = net(dimgs, batch_size); + std::vector<std::vector<mmod_rect> > all_rects; + + for(auto&& im_dets : dets) + { + std::vector<mmod_rect> rects; + rects.reserve(im_dets.size()); + for (auto&& d : im_dets) { + d.rect = pyr.rect_down(d.rect, upsample_num_times); + rects.push_back(d); + } + all_rects.push_back(rects); + } + + return all_rects; + } + +private: + + template <long num_filters, typename SUBNET> using con5d = con<num_filters,5,5,2,2,SUBNET>; + template <long num_filters, typename SUBNET> using con5 = con<num_filters,5,5,1,1,SUBNET>; + + template <typename SUBNET> using downsampler = relu<affine<con5d<32, relu<affine<con5d<32, relu<affine<con5d<16,SUBNET>>>>>>>>>; + template <typename SUBNET> using rcon5 = relu<affine<con5<45,SUBNET>>>; + + using net_type = loss_mmod<con<1,9,9,1,1,rcon5<rcon5<rcon5<downsampler<input_rgb_image_pyramid<pyramid_down<6>>>>>>>>; + + net_type net; +}; + +// ---------------------------------------------------------------------------------------- + +void bind_cnn_face_detection(py::module& m) +{ + { + py::class_<cnn_face_detection_model_v1>(m, "cnn_face_detection_model_v1", "This object detects human faces in an image. The constructor loads the face detection model from a file. You can download a pre-trained model from http://dlib.net/files/mmod_human_face_detector.dat.bz2.") + .def(py::init<std::string>()) + .def( + "__call__", + &cnn_face_detection_model_v1::detect_mult, + py::arg("imgs"), py::arg("upsample_num_times")=0, py::arg("batch_size")=128, + "takes a list of images as input returning a 2d list of mmod rectangles" + ) + .def( + "__call__", + &cnn_face_detection_model_v1::detect, + py::arg("img"), py::arg("upsample_num_times")=0, + "Find faces in an image using a deep learning model.\n\ + - Upsamples the image upsample_num_times before running the face \n\ + detector." + ); + } + + m.def("set_dnn_prefer_smallest_algorithms", &set_dnn_prefer_smallest_algorithms, "Tells cuDNN to use slower algorithms that use less RAM."); + + auto cuda = m.def_submodule("cuda", "Routines for setting CUDA specific properties."); + cuda.def("set_device", &dlib::cuda::set_device, py::arg("device_id"), + "Set the active CUDA device. It is required that 0 <= device_id < get_num_devices()."); + cuda.def("get_device", &dlib::cuda::get_device, "Get the active CUDA device."); + cuda.def("get_num_devices", &dlib::cuda::get_num_devices, "Find out how many CUDA devices are available."); + + { + typedef mmod_rect type; + py::class_<type>(m, "mmod_rectangle", "Wrapper around a rectangle object and a detection confidence score.") + .def_readwrite("rect", &type::rect) + .def_readwrite("confidence", &type::detection_confidence); + } + { + typedef std::vector<mmod_rect> type; + py::bind_vector<type>(m, "mmod_rectangles", "An array of mmod rectangle objects.") + .def("extend", extend_vector_with_python_list<mmod_rect>); + } + { + typedef std::vector<std::vector<mmod_rect> > type; + py::bind_vector<type>(m, "mmod_rectangless", "A 2D array of mmod rectangle objects.") + .def("extend", extend_vector_with_python_list<std::vector<mmod_rect>>); + } +} diff --git a/ml/dlib/tools/python/src/conversion.h b/ml/dlib/tools/python/src/conversion.h new file mode 100644 index 00000000..9ab2360a --- /dev/null +++ b/ml/dlib/tools/python/src/conversion.h @@ -0,0 +1,52 @@ +// Copyright (C) 2014 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_PYTHON_CONVERSION_H__ +#define DLIB_PYTHON_CONVERSION_H__ + +#include "opaque_types.h" +#include <dlib/python.h> +#include <dlib/pixel.h> + +using namespace dlib; +using namespace std; + +namespace py = pybind11; + +template <typename dest_image_type> +void pyimage_to_dlib_image(py::object img, dest_image_type& image) +{ + if (is_gray_python_image(img)) + assign_image(image, numpy_gray_image(img)); + else if (is_rgb_python_image(img)) + assign_image(image, numpy_rgb_image(img)); + else + throw dlib::error("Unsupported image type, must be 8bit gray or RGB image."); +} + +template <typename image_array, typename param_type> +void images_and_nested_params_to_dlib( + const py::object& pyimages, + const py::object& pyparams, + image_array& images, + std::vector<std::vector<param_type> >& params +) +{ + // Now copy the data into dlib based objects. + py::iterator image_it = pyimages.begin(); + py::iterator params_it = pyparams.begin(); + + for (unsigned long image_idx = 0; + image_it != pyimages.end() + && params_it != pyparams.end(); + ++image_it, ++params_it, ++image_idx) + { + for (py::iterator param_it = params_it->begin(); + param_it != params_it->end(); + ++param_it) + params[image_idx].push_back(param_it->cast<param_type>()); + + pyimage_to_dlib_image(image_it->cast<py::object>(), images[image_idx]); + } +} + +#endif // DLIB_PYTHON_CONVERSION_H__ diff --git a/ml/dlib/tools/python/src/correlation_tracker.cpp b/ml/dlib/tools/python/src/correlation_tracker.cpp new file mode 100644 index 00000000..1b17ba54 --- /dev/null +++ b/ml/dlib/tools/python/src/correlation_tracker.cpp @@ -0,0 +1,167 @@ +// Copyright (C) 2014 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. + +#include "opaque_types.h" +#include <dlib/python.h> +#include <dlib/geometry.h> +#include <dlib/image_processing.h> + +using namespace dlib; +using namespace std; + +namespace py = pybind11; + +// ---------------------------------------------------------------------------------------- + +void start_track ( + correlation_tracker& tracker, + py::object img, + const drectangle& bounding_box +) +{ + if (is_gray_python_image(img)) + { + tracker.start_track(numpy_gray_image(img), bounding_box); + } + else if (is_rgb_python_image(img)) + { + tracker.start_track(numpy_rgb_image(img), bounding_box); + } + else + { + throw dlib::error("Unsupported image type, must be 8bit gray or RGB image."); + } +} + +void start_track_rec ( + correlation_tracker& tracker, + py::object img, + const rectangle& bounding_box +) +{ + drectangle dbounding_box(bounding_box); + start_track(tracker, img, dbounding_box); +} + +double update ( + correlation_tracker& tracker, + py::object img +) +{ + if (is_gray_python_image(img)) + { + return tracker.update(numpy_gray_image(img)); + } + else if (is_rgb_python_image(img)) + { + return tracker.update(numpy_rgb_image(img)); + } + else + { + throw dlib::error("Unsupported image type, must be 8bit gray or RGB image."); + } +} + +double update_guess ( + correlation_tracker& tracker, + py::object img, + const drectangle& bounding_box +) +{ + if (is_gray_python_image(img)) + { + return tracker.update(numpy_gray_image(img), bounding_box); + } + else if (is_rgb_python_image(img)) + { + return tracker.update(numpy_rgb_image(img), bounding_box); + } + else + { + throw dlib::error("Unsupported image type, must be 8bit gray or RGB image."); + } +} + +double update_guess_rec ( + correlation_tracker& tracker, + py::object img, + const rectangle& bounding_box +) +{ + drectangle dbounding_box(bounding_box); + return update_guess(tracker, img, dbounding_box); +} + +drectangle get_position (const correlation_tracker& tracker) { return tracker.get_position(); } + +// ---------------------------------------------------------------------------------------- + +void bind_correlation_tracker(py::module &m) +{ + { + typedef correlation_tracker type; + py::class_<type>(m, "correlation_tracker", "This is a tool for tracking moving objects in a video stream. You give it \n\ + the bounding box of an object in the first frame and it attempts to track the \n\ + object in the box from frame to frame. \n\ + This tool is an implementation of the method described in the following paper: \n\ + Danelljan, Martin, et al. 'Accurate scale estimation for robust visual \n\ + tracking.' Proceedings of the British Machine Vision Conference BMVC. 2014.") + .def(py::init()) + .def("start_track", &::start_track, py::arg("image"), py::arg("bounding_box"), "\ + requires \n\ + - image is a numpy ndarray containing either an 8bit grayscale or RGB image. \n\ + - bounding_box.is_empty() == false \n\ + ensures \n\ + - This object will start tracking the thing inside the bounding box in the \n\ + given image. That is, if you call update() with subsequent video frames \n\ + then it will try to keep track of the position of the object inside bounding_box. \n\ + - #get_position() == bounding_box") + .def("start_track", &::start_track_rec, py::arg("image"), py::arg("bounding_box"), "\ + requires \n\ + - image is a numpy ndarray containing either an 8bit grayscale or RGB image. \n\ + - bounding_box.is_empty() == false \n\ + ensures \n\ + - This object will start tracking the thing inside the bounding box in the \n\ + given image. That is, if you call update() with subsequent video frames \n\ + then it will try to keep track of the position of the object inside bounding_box. \n\ + - #get_position() == bounding_box") + .def("update", &::update, py::arg("image"), "\ + requires \n\ + - image is a numpy ndarray containing either an 8bit grayscale or RGB image. \n\ + - get_position().is_empty() == false \n\ + (i.e. you must have started tracking by calling start_track()) \n\ + ensures \n\ + - performs: return update(img, get_position())") + .def("update", &::update_guess, py::arg("image"), py::arg("guess"), "\ + requires \n\ + - image is a numpy ndarray containing either an 8bit grayscale or RGB image. \n\ + - get_position().is_empty() == false \n\ + (i.e. you must have started tracking by calling start_track()) \n\ + ensures \n\ + - When searching for the object in img, we search in the area around the \n\ + provided guess. \n\ + - #get_position() == the new predicted location of the object in img. This \n\ + location will be a copy of guess that has been translated and scaled \n\ + appropriately based on the content of img so that it, hopefully, bounds \n\ + the object in img. \n\ + - Returns the peak to side-lobe ratio. This is a number that measures how \n\ + confident the tracker is that the object is inside #get_position(). \n\ + Larger values indicate higher confidence.") + .def("update", &::update_guess_rec, py::arg("image"), py::arg("guess"), "\ + requires \n\ + - image is a numpy ndarray containing either an 8bit grayscale or RGB image. \n\ + - get_position().is_empty() == false \n\ + (i.e. you must have started tracking by calling start_track()) \n\ + ensures \n\ + - When searching for the object in img, we search in the area around the \n\ + provided guess. \n\ + - #get_position() == the new predicted location of the object in img. This \n\ + location will be a copy of guess that has been translated and scaled \n\ + appropriately based on the content of img so that it, hopefully, bounds \n\ + the object in img. \n\ + - Returns the peak to side-lobe ratio. This is a number that measures how \n\ + confident the tracker is that the object is inside #get_position(). \n\ + Larger values indicate higher confidence.") + .def("get_position", &::get_position, "returns the predicted position of the object under track."); + } +} diff --git a/ml/dlib/tools/python/src/decision_functions.cpp b/ml/dlib/tools/python/src/decision_functions.cpp new file mode 100644 index 00000000..a93fe49b --- /dev/null +++ b/ml/dlib/tools/python/src/decision_functions.cpp @@ -0,0 +1,263 @@ +// Copyright (C) 2013 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. + +#include "opaque_types.h" +#include <dlib/python.h> +#include "testing_results.h" +#include <dlib/svm.h> + +using namespace dlib; +using namespace std; + +namespace py = pybind11; + +typedef matrix<double,0,1> sample_type; +typedef std::vector<std::pair<unsigned long,double> > sparse_vect; + +template <typename decision_function> +double predict ( + const decision_function& df, + const typename decision_function::kernel_type::sample_type& samp +) +{ + typedef typename decision_function::kernel_type::sample_type T; + if (df.basis_vectors.size() == 0) + { + return 0; + } + else if (is_matrix<T>::value && df.basis_vectors(0).size() != samp.size()) + { + std::ostringstream sout; + sout << "Input vector should have " << df.basis_vectors(0).size() + << " dimensions, not " << samp.size() << "."; + PyErr_SetString( PyExc_ValueError, sout.str().c_str() ); + throw py::error_already_set(); + } + return df(samp); +} + +template <typename kernel_type> +void add_df ( + py::module& m, + const std::string name +) +{ + typedef decision_function<kernel_type> df_type; + py::class_<df_type>(m, name.c_str()) + .def("__call__", &predict<df_type>) + .def(py::pickle(&getstate<df_type>, &setstate<df_type>)); +} + +template <typename df_type> +typename df_type::sample_type get_weights( + const df_type& df +) +{ + if (df.basis_vectors.size() == 0) + { + PyErr_SetString( PyExc_ValueError, "Decision function is empty." ); + throw py::error_already_set(); + } + df_type temp = simplify_linear_decision_function(df); + return temp.basis_vectors(0); +} + +template <typename df_type> +typename df_type::scalar_type get_bias( + const df_type& df +) +{ + if (df.basis_vectors.size() == 0) + { + PyErr_SetString( PyExc_ValueError, "Decision function is empty." ); + throw py::error_already_set(); + } + return df.b; +} + +template <typename df_type> +void set_bias( + df_type& df, + double b +) +{ + if (df.basis_vectors.size() == 0) + { + PyErr_SetString( PyExc_ValueError, "Decision function is empty." ); + throw py::error_already_set(); + } + df.b = b; +} + +template <typename kernel_type> +void add_linear_df ( + py::module &m, + const std::string name +) +{ + typedef decision_function<kernel_type> df_type; + py::class_<df_type>(m, name.c_str()) + .def("__call__", predict<df_type>) + .def_property_readonly("weights", &get_weights<df_type>) + .def_property("bias", get_bias<df_type>, set_bias<df_type>) + .def(py::pickle(&getstate<df_type>, &setstate<df_type>)); +} + +// ---------------------------------------------------------------------------------------- + +std::string binary_test__str__(const binary_test& item) +{ + std::ostringstream sout; + sout << "class1_accuracy: "<< item.class1_accuracy << " class2_accuracy: "<< item.class2_accuracy; + return sout.str(); +} +std::string binary_test__repr__(const binary_test& item) { return "< " + binary_test__str__(item) + " >";} + +std::string regression_test__str__(const regression_test& item) +{ + std::ostringstream sout; + sout << "mean_squared_error: "<< item.mean_squared_error << " R_squared: "<< item.R_squared; + sout << " mean_average_error: "<< item.mean_average_error << " mean_error_stddev: "<< item.mean_error_stddev; + return sout.str(); +} +std::string regression_test__repr__(const regression_test& item) { return "< " + regression_test__str__(item) + " >";} + +std::string ranking_test__str__(const ranking_test& item) +{ + std::ostringstream sout; + sout << "ranking_accuracy: "<< item.ranking_accuracy << " mean_ap: "<< item.mean_ap; + return sout.str(); +} +std::string ranking_test__repr__(const ranking_test& item) { return "< " + ranking_test__str__(item) + " >";} + +// ---------------------------------------------------------------------------------------- + +template <typename K> +binary_test _test_binary_decision_function ( + const decision_function<K>& dec_funct, + const std::vector<typename K::sample_type>& x_test, + const std::vector<double>& y_test +) { return binary_test(test_binary_decision_function(dec_funct, x_test, y_test)); } + +template <typename K> +regression_test _test_regression_function ( + const decision_function<K>& reg_funct, + const std::vector<typename K::sample_type>& x_test, + const std::vector<double>& y_test +) { return regression_test(test_regression_function(reg_funct, x_test, y_test)); } + +template < typename K > +ranking_test _test_ranking_function1 ( + const decision_function<K>& funct, + const std::vector<ranking_pair<typename K::sample_type> >& samples +) { return ranking_test(test_ranking_function(funct, samples)); } + +template < typename K > +ranking_test _test_ranking_function2 ( + const decision_function<K>& funct, + const ranking_pair<typename K::sample_type>& sample +) { return ranking_test(test_ranking_function(funct, sample)); } + + +void bind_decision_functions(py::module &m) +{ + add_linear_df<linear_kernel<sample_type> >(m, "_decision_function_linear"); + add_linear_df<sparse_linear_kernel<sparse_vect> >(m, "_decision_function_sparse_linear"); + + add_df<histogram_intersection_kernel<sample_type> >(m, "_decision_function_histogram_intersection"); + add_df<sparse_histogram_intersection_kernel<sparse_vect> >(m, "_decision_function_sparse_histogram_intersection"); + + add_df<polynomial_kernel<sample_type> >(m, "_decision_function_polynomial"); + add_df<sparse_polynomial_kernel<sparse_vect> >(m, "_decision_function_sparse_polynomial"); + + add_df<radial_basis_kernel<sample_type> >(m, "_decision_function_radial_basis"); + add_df<sparse_radial_basis_kernel<sparse_vect> >(m, "_decision_function_sparse_radial_basis"); + + add_df<sigmoid_kernel<sample_type> >(m, "_decision_function_sigmoid"); + add_df<sparse_sigmoid_kernel<sparse_vect> >(m, "_decision_function_sparse_sigmoid"); + + + m.def("test_binary_decision_function", _test_binary_decision_function<linear_kernel<sample_type> >, + py::arg("function"), py::arg("samples"), py::arg("labels")); + m.def("test_binary_decision_function", _test_binary_decision_function<sparse_linear_kernel<sparse_vect> >, + py::arg("function"), py::arg("samples"), py::arg("labels")); + m.def("test_binary_decision_function", _test_binary_decision_function<radial_basis_kernel<sample_type> >, + py::arg("function"), py::arg("samples"), py::arg("labels")); + m.def("test_binary_decision_function", _test_binary_decision_function<sparse_radial_basis_kernel<sparse_vect> >, + py::arg("function"), py::arg("samples"), py::arg("labels")); + m.def("test_binary_decision_function", _test_binary_decision_function<polynomial_kernel<sample_type> >, + py::arg("function"), py::arg("samples"), py::arg("labels")); + m.def("test_binary_decision_function", _test_binary_decision_function<sparse_polynomial_kernel<sparse_vect> >, + py::arg("function"), py::arg("samples"), py::arg("labels")); + m.def("test_binary_decision_function", _test_binary_decision_function<histogram_intersection_kernel<sample_type> >, + py::arg("function"), py::arg("samples"), py::arg("labels")); + m.def("test_binary_decision_function", _test_binary_decision_function<sparse_histogram_intersection_kernel<sparse_vect> >, + py::arg("function"), py::arg("samples"), py::arg("labels")); + m.def("test_binary_decision_function", _test_binary_decision_function<sigmoid_kernel<sample_type> >, + py::arg("function"), py::arg("samples"), py::arg("labels")); + m.def("test_binary_decision_function", _test_binary_decision_function<sparse_sigmoid_kernel<sparse_vect> >, + py::arg("function"), py::arg("samples"), py::arg("labels")); + + m.def("test_regression_function", _test_regression_function<linear_kernel<sample_type> >, + py::arg("function"), py::arg("samples"), py::arg("targets")); + m.def("test_regression_function", _test_regression_function<sparse_linear_kernel<sparse_vect> >, + py::arg("function"), py::arg("samples"), py::arg("targets")); + m.def("test_regression_function", _test_regression_function<radial_basis_kernel<sample_type> >, + py::arg("function"), py::arg("samples"), py::arg("targets")); + m.def("test_regression_function", _test_regression_function<sparse_radial_basis_kernel<sparse_vect> >, + py::arg("function"), py::arg("samples"), py::arg("targets")); + m.def("test_regression_function", _test_regression_function<histogram_intersection_kernel<sample_type> >, + py::arg("function"), py::arg("samples"), py::arg("targets")); + m.def("test_regression_function", _test_regression_function<sparse_histogram_intersection_kernel<sparse_vect> >, + py::arg("function"), py::arg("samples"), py::arg("targets")); + m.def("test_regression_function", _test_regression_function<sigmoid_kernel<sample_type> >, + py::arg("function"), py::arg("samples"), py::arg("targets")); + m.def("test_regression_function", _test_regression_function<sparse_sigmoid_kernel<sparse_vect> >, + py::arg("function"), py::arg("samples"), py::arg("targets")); + m.def("test_regression_function", _test_regression_function<polynomial_kernel<sample_type> >, + py::arg("function"), py::arg("samples"), py::arg("targets")); + m.def("test_regression_function", _test_regression_function<sparse_polynomial_kernel<sparse_vect> >, + py::arg("function"), py::arg("samples"), py::arg("targets")); + + m.def("test_ranking_function", _test_ranking_function1<linear_kernel<sample_type> >, + py::arg("function"), py::arg("samples")); + m.def("test_ranking_function", _test_ranking_function1<sparse_linear_kernel<sparse_vect> >, + py::arg("function"), py::arg("samples")); + m.def("test_ranking_function", _test_ranking_function2<linear_kernel<sample_type> >, + py::arg("function"), py::arg("sample")); + m.def("test_ranking_function", _test_ranking_function2<sparse_linear_kernel<sparse_vect> >, + py::arg("function"), py::arg("sample")); + + + py::class_<binary_test>(m, "_binary_test") + .def("__str__", binary_test__str__) + .def("__repr__", binary_test__repr__) + .def_readwrite("class1_accuracy", &binary_test::class1_accuracy, + "A value between 0 and 1, measures accuracy on the +1 class.") + .def_readwrite("class2_accuracy", &binary_test::class2_accuracy, + "A value between 0 and 1, measures accuracy on the -1 class."); + + py::class_<ranking_test>(m, "_ranking_test") + .def("__str__", ranking_test__str__) + .def("__repr__", ranking_test__repr__) + .def_readwrite("ranking_accuracy", &ranking_test::ranking_accuracy, + "A value between 0 and 1, measures the fraction of times a relevant sample was ordered before a non-relevant sample.") + .def_readwrite("mean_ap", &ranking_test::mean_ap, + "A value between 0 and 1, measures the mean average precision of the ranking."); + + py::class_<regression_test>(m, "_regression_test") + .def("__str__", regression_test__str__) + .def("__repr__", regression_test__repr__) + .def_readwrite("mean_average_error", ®ression_test::mean_average_error, + "The mean average error of a regression function on a dataset.") + .def_readwrite("mean_error_stddev", ®ression_test::mean_error_stddev, + "The standard deviation of the absolute value of the error of a regression function on a dataset.") + .def_readwrite("mean_squared_error", ®ression_test::mean_squared_error, + "The mean squared error of a regression function on a dataset.") + .def_readwrite("R_squared", ®ression_test::R_squared, + "A value between 0 and 1, measures the squared correlation between the output of a \n" + "regression function and the target values."); +} + + + diff --git a/ml/dlib/tools/python/src/dlib.cpp b/ml/dlib/tools/python/src/dlib.cpp new file mode 100644 index 00000000..ac6fea0d --- /dev/null +++ b/ml/dlib/tools/python/src/dlib.cpp @@ -0,0 +1,110 @@ +// Copyright (C) 2015 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. + +#include "opaque_types.h" +#include <pybind11/pybind11.h> +#include <dlib/simd.h> +#include <string> + +namespace py = pybind11; + +void bind_matrix(py::module& m); +void bind_vector(py::module& m); +void bind_svm_c_trainer(py::module& m); +void bind_decision_functions(py::module& m); +void bind_basic_types(py::module& m); +void bind_other(py::module& m); +void bind_svm_rank_trainer(py::module& m); +void bind_cca(py::module& m); +void bind_sequence_segmenter(py::module& m); +void bind_svm_struct(py::module& m); +void bind_image_classes(py::module& m); +void bind_rectangles(py::module& m); +void bind_object_detection(py::module& m); +void bind_shape_predictors(py::module& m); +void bind_correlation_tracker(py::module& m); +void bind_face_recognition(py::module& m); +void bind_cnn_face_detection(py::module& m); +void bind_global_optimization(py::module& m); +void bind_numpy_returns(py::module& m); +void bind_image_dataset_metadata(py::module& m); + +#ifndef DLIB_NO_GUI_SUPPORT +void bind_gui(py::module& m); +#endif + +PYBIND11_MODULE(dlib, m) +{ + warn_about_unavailable_but_used_cpu_instructions(); + + +#define DLIB_QUOTE_STRING(x) DLIB_QUOTE_STRING2(x) +#define DLIB_QUOTE_STRING2(x) #x + m.attr("__version__") = DLIB_QUOTE_STRING(DLIB_VERSION); + m.attr("__time_compiled__") = std::string(__DATE__) + " " + std::string(__TIME__); + +#ifdef DLIB_USE_CUDA + m.attr("DLIB_USE_CUDA") = true; +#else + m.attr("DLIB_USE_CUDA") = false; +#endif +#ifdef DLIB_USE_BLAS + m.attr("DLIB_USE_BLAS") = true; +#else + m.attr("DLIB_USE_BLAS") = false; +#endif +#ifdef DLIB_USE_LAPACK + m.attr("DLIB_USE_LAPACK") = true; +#else + m.attr("DLIB_USE_LAPACK") = false; +#endif +#ifdef DLIB_HAVE_AVX + m.attr("USE_AVX_INSTRUCTIONS") = true; +#else + m.attr("USE_AVX_INSTRUCTIONS") = false; +#endif +#ifdef DLIB_HAVE_NEON + m.attr("USE_NEON_INSTRUCTIONS") = true; +#else + m.attr("USE_NEON_INSTRUCTIONS") = false; +#endif + + + + // Note that the order here matters. We need to do the basic types first. If we don't + // then what happens is the documentation created by sphinx will use horrible big + // template names to refer to C++ objects rather than the python names python users + // will expect. For instance, if bind_basic_types() isn't called early then when + // routines take a std::vector<double>, rather than saying dlib.array in the python + // docs it will say "std::vector<double, std::allocator<double> >" which is awful and + // confusing to python users. + // + // So when adding new things always add them to the end of the list. + bind_matrix(m); + bind_vector(m); + bind_basic_types(m); + bind_other(m); + + bind_svm_rank_trainer(m); + bind_decision_functions(m); + bind_cca(m); + bind_sequence_segmenter(m); + bind_svm_struct(m); + bind_image_classes(m); + bind_rectangles(m); + bind_object_detection(m); + bind_shape_predictors(m); + bind_correlation_tracker(m); + bind_face_recognition(m); + bind_cnn_face_detection(m); + bind_global_optimization(m); + bind_numpy_returns(m); + bind_svm_c_trainer(m); +#ifndef DLIB_NO_GUI_SUPPORT + bind_gui(m); +#endif + + bind_image_dataset_metadata(m); + + +} diff --git a/ml/dlib/tools/python/src/face_recognition.cpp b/ml/dlib/tools/python/src/face_recognition.cpp new file mode 100644 index 00000000..8d5dee67 --- /dev/null +++ b/ml/dlib/tools/python/src/face_recognition.cpp @@ -0,0 +1,245 @@ +// Copyright (C) 2017 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. + +#include "opaque_types.h" +#include <dlib/python.h> +#include <dlib/matrix.h> +#include <dlib/geometry/vector.h> +#include <dlib/dnn.h> +#include <dlib/image_transforms.h> +#include "indexing.h" +#include <dlib/image_io.h> +#include <dlib/clustering.h> +#include <pybind11/stl_bind.h> + + +using namespace dlib; +using namespace std; + +namespace py = pybind11; + + +typedef matrix<double,0,1> cv; + +class face_recognition_model_v1 +{ + +public: + + face_recognition_model_v1(const std::string& model_filename) + { + deserialize(model_filename) >> net; + } + + matrix<double,0,1> compute_face_descriptor ( + py::object img, + const full_object_detection& face, + const int num_jitters + ) + { + std::vector<full_object_detection> faces(1, face); + return compute_face_descriptors(img, faces, num_jitters)[0]; + } + + std::vector<matrix<double,0,1>> compute_face_descriptors ( + py::object img, + const std::vector<full_object_detection>& faces, + const int num_jitters + ) + { + if (!is_rgb_python_image(img)) + throw dlib::error("Unsupported image type, must be RGB image."); + + for (auto& f : faces) + { + if (f.num_parts() != 68 && f.num_parts() != 5) + throw dlib::error("The full_object_detection must use the iBUG 300W 68 point face landmark style or dlib's 5 point style."); + } + + + std::vector<chip_details> dets; + for (auto& f : faces) + dets.push_back(get_face_chip_details(f, 150, 0.25)); + dlib::array<matrix<rgb_pixel>> face_chips; + extract_image_chips(numpy_rgb_image(img), dets, face_chips); + + std::vector<matrix<double,0,1>> face_descriptors; + face_descriptors.reserve(face_chips.size()); + + if (num_jitters <= 1) + { + // extract descriptors and convert from float vectors to double vectors + for (auto& d : net(face_chips,16)) + face_descriptors.push_back(matrix_cast<double>(d)); + } + else + { + for (auto& fimg : face_chips) + face_descriptors.push_back(matrix_cast<double>(mean(mat(net(jitter_image(fimg,num_jitters),16))))); + } + + return face_descriptors; + } + +private: + + dlib::rand rnd; + + std::vector<matrix<rgb_pixel>> jitter_image( + const matrix<rgb_pixel>& img, + const int num_jitters + ) + { + std::vector<matrix<rgb_pixel>> crops; + for (int i = 0; i < num_jitters; ++i) + crops.push_back(dlib::jitter_image(img,rnd)); + return crops; + } + + + template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET> + using residual = add_prev1<block<N,BN,1,tag1<SUBNET>>>; + + template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET> + using residual_down = add_prev2<avg_pool<2,2,2,2,skip1<tag2<block<N,BN,2,tag1<SUBNET>>>>>>; + + template <int N, template <typename> class BN, int stride, typename SUBNET> + using block = BN<con<N,3,3,1,1,relu<BN<con<N,3,3,stride,stride,SUBNET>>>>>; + + template <int N, typename SUBNET> using ares = relu<residual<block,N,affine,SUBNET>>; + template <int N, typename SUBNET> using ares_down = relu<residual_down<block,N,affine,SUBNET>>; + + template <typename SUBNET> using alevel0 = ares_down<256,SUBNET>; + template <typename SUBNET> using alevel1 = ares<256,ares<256,ares_down<256,SUBNET>>>; + template <typename SUBNET> using alevel2 = ares<128,ares<128,ares_down<128,SUBNET>>>; + template <typename SUBNET> using alevel3 = ares<64,ares<64,ares<64,ares_down<64,SUBNET>>>>; + template <typename SUBNET> using alevel4 = ares<32,ares<32,ares<32,SUBNET>>>; + + using anet_type = loss_metric<fc_no_bias<128,avg_pool_everything< + alevel0< + alevel1< + alevel2< + alevel3< + alevel4< + max_pool<3,3,2,2,relu<affine<con<32,7,7,2,2, + input_rgb_image_sized<150> + >>>>>>>>>>>>; + anet_type net; +}; + +// ---------------------------------------------------------------------------------------- + +py::list chinese_whispers_clustering(py::list descriptors, float threshold) +{ + DLIB_CASSERT(threshold > 0); + py::list clusters; + + size_t num_descriptors = py::len(descriptors); + + // This next bit of code creates a graph of connected objects and then uses the Chinese + // whispers graph clustering algorithm to identify how many objects there are and which + // objects belong to which cluster. + std::vector<sample_pair> edges; + std::vector<unsigned long> labels; + for (size_t i = 0; i < num_descriptors; ++i) + { + for (size_t j = i; j < num_descriptors; ++j) + { + matrix<double,0,1>& first_descriptor = descriptors[i].cast<matrix<double,0,1>&>(); + matrix<double,0,1>& second_descriptor = descriptors[j].cast<matrix<double,0,1>&>(); + + if (length(first_descriptor-second_descriptor) < threshold) + edges.push_back(sample_pair(i,j)); + } + } + chinese_whispers(edges, labels); + for (size_t i = 0; i < labels.size(); ++i) + { + clusters.append(labels[i]); + } + return clusters; +} + +void save_face_chips ( + py::object img, + const std::vector<full_object_detection>& faces, + const std::string& chip_filename, + size_t size = 150, + float padding = 0.25 +) +{ + if (!is_rgb_python_image(img)) + throw dlib::error("Unsupported image type, must be RGB image."); + + int num_faces = faces.size(); + std::vector<chip_details> dets; + for (auto& f : faces) + dets.push_back(get_face_chip_details(f, size, padding)); + dlib::array<matrix<rgb_pixel>> face_chips; + extract_image_chips(numpy_rgb_image(img), dets, face_chips); + int i=0; + for (auto& chip : face_chips) + { + i++; + if(num_faces > 1) + { + const std::string& file_name = chip_filename + "_" + std::to_string(i) + ".jpg"; + save_jpeg(chip, file_name); + } + else + { + const std::string& file_name = chip_filename + ".jpg"; + save_jpeg(chip, file_name); + } + } +} + +void save_face_chip ( + py::object img, + const full_object_detection& face, + const std::string& chip_filename, + size_t size = 150, + float padding = 0.25 +) +{ + std::vector<full_object_detection> faces(1, face); + save_face_chips(img, faces, chip_filename, size, padding); + return; +} + +void bind_face_recognition(py::module &m) +{ + { + py::class_<face_recognition_model_v1>(m, "face_recognition_model_v1", "This object maps human faces into 128D vectors where pictures of the same person are mapped near to each other and pictures of different people are mapped far apart. The constructor loads the face recognition model from a file. The model file is available here: http://dlib.net/files/dlib_face_recognition_resnet_model_v1.dat.bz2") + .def(py::init<std::string>()) + .def("compute_face_descriptor", &face_recognition_model_v1::compute_face_descriptor, py::arg("img"),py::arg("face"),py::arg("num_jitters")=0, + "Takes an image and a full_object_detection that references a face in that image and converts it into a 128D face descriptor. " + "If num_jitters>1 then each face will be randomly jittered slightly num_jitters times, each run through the 128D projection, and the average used as the face descriptor." + ) + .def("compute_face_descriptor", &face_recognition_model_v1::compute_face_descriptors, py::arg("img"),py::arg("faces"),py::arg("num_jitters")=0, + "Takes an image and an array of full_object_detections that reference faces in that image and converts them into 128D face descriptors. " + "If num_jitters>1 then each face will be randomly jittered slightly num_jitters times, each run through the 128D projection, and the average used as the face descriptor." + ); + } + + m.def("save_face_chip", &save_face_chip, + "Takes an image and a full_object_detection that references a face in that image and saves the face with the specified file name prefix. The face will be rotated upright and scaled to 150x150 pixels or with the optional specified size and padding.", + py::arg("img"), py::arg("face"), py::arg("chip_filename"), py::arg("size")=150, py::arg("padding")=0.25 + ); + m.def("save_face_chips", &save_face_chips, + "Takes an image and a full_object_detections object that reference faces in that image and saves the faces with the specified file name prefix. The faces will be rotated upright and scaled to 150x150 pixels or with the optional specified size and padding.", + py::arg("img"), py::arg("faces"), py::arg("chip_filename"), py::arg("size")=150, py::arg("padding")=0.25 + ); + m.def("chinese_whispers_clustering", &chinese_whispers_clustering, py::arg("descriptors"), py::arg("threshold"), + "Takes a list of descriptors and returns a list that contains a label for each descriptor. Clustering is done using dlib::chinese_whispers." + ); + { + typedef std::vector<full_object_detection> type; + py::bind_vector<type>(m, "full_object_detections", "An array of full_object_detection objects.") + .def("clear", &type::clear) + .def("resize", resize<type>) + .def("extend", extend_vector_with_python_list<full_object_detection>) + .def(py::pickle(&getstate<type>, &setstate<type>)); + } +} + diff --git a/ml/dlib/tools/python/src/global_optimization.cpp b/ml/dlib/tools/python/src/global_optimization.cpp new file mode 100644 index 00000000..f27185c5 --- /dev/null +++ b/ml/dlib/tools/python/src/global_optimization.cpp @@ -0,0 +1,442 @@ +// Copyright (C) 2017 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. + +#include "opaque_types.h" +#include <dlib/python.h> +#include <dlib/global_optimization.h> +#include <dlib/matrix.h> +#include <pybind11/stl.h> + + +using namespace dlib; +using namespace std; +namespace py = pybind11; + +// ---------------------------------------------------------------------------------------- + +std::vector<bool> list_to_bool_vector( + const py::list& l +) +{ + std::vector<bool> result(len(l)); + for (long i = 0; i < result.size(); ++i) + { + result[i] = l[i].cast<bool>(); + } + return result; +} + +matrix<double,0,1> list_to_mat( + const py::list& l +) +{ + matrix<double,0,1> result(len(l)); + for (long i = 0; i < result.size(); ++i) + result(i) = l[i].cast<double>(); + return result; +} + +py::list mat_to_list ( + const matrix<double,0,1>& m +) +{ + py::list l; + for (long i = 0; i < m.size(); ++i) + l.append(m(i)); + return l; +} + +size_t num_function_arguments(py::object f, size_t expected_num) +{ + const auto code_object = f.attr(hasattr(f,"func_code") ? "func_code" : "__code__"); + const auto num = code_object.attr("co_argcount").cast<std::size_t>(); + if (num < expected_num && (code_object.attr("co_flags").cast<int>() & CO_VARARGS)) + return expected_num; + return num; +} + +double call_func(py::object f, const matrix<double,0,1>& args) +{ + const auto num = num_function_arguments(f, args.size()); + DLIB_CASSERT(num == args.size(), + "The function being optimized takes a number of arguments that doesn't agree with the size of the bounds lists you provided to find_max_global()"); + DLIB_CASSERT(0 < num && num < 15, "Functions being optimized must take between 1 and 15 scalar arguments."); + +#define CALL_WITH_N_ARGS(N) case N: return dlib::gopt_impl::_cwv(f,args,typename make_compile_time_integer_range<N>::type()).cast<double>(); + switch (num) + { + CALL_WITH_N_ARGS(1) + CALL_WITH_N_ARGS(2) + CALL_WITH_N_ARGS(3) + CALL_WITH_N_ARGS(4) + CALL_WITH_N_ARGS(5) + CALL_WITH_N_ARGS(6) + CALL_WITH_N_ARGS(7) + CALL_WITH_N_ARGS(8) + CALL_WITH_N_ARGS(9) + CALL_WITH_N_ARGS(10) + CALL_WITH_N_ARGS(11) + CALL_WITH_N_ARGS(12) + CALL_WITH_N_ARGS(13) + CALL_WITH_N_ARGS(14) + CALL_WITH_N_ARGS(15) + + default: + DLIB_CASSERT(false, "oops"); + break; + } +} + +// ---------------------------------------------------------------------------------------- + +py::tuple py_find_max_global ( + py::object f, + py::list bound1, + py::list bound2, + py::list is_integer_variable, + unsigned long num_function_calls, + double solver_epsilon = 0 +) +{ + DLIB_CASSERT(len(bound1) == len(bound2)); + DLIB_CASSERT(len(bound1) == len(is_integer_variable)); + + auto func = [&](const matrix<double,0,1>& x) + { + return call_func(f, x); + }; + + auto result = find_max_global(func, list_to_mat(bound1), list_to_mat(bound2), + list_to_bool_vector(is_integer_variable), max_function_calls(num_function_calls), + solver_epsilon); + + return py::make_tuple(mat_to_list(result.x),result.y); +} + +py::tuple py_find_max_global2 ( + py::object f, + py::list bound1, + py::list bound2, + unsigned long num_function_calls, + double solver_epsilon = 0 +) +{ + DLIB_CASSERT(len(bound1) == len(bound2)); + + auto func = [&](const matrix<double,0,1>& x) + { + return call_func(f, x); + }; + + auto result = find_max_global(func, list_to_mat(bound1), list_to_mat(bound2), max_function_calls(num_function_calls), solver_epsilon); + + return py::make_tuple(mat_to_list(result.x),result.y); +} + +// ---------------------------------------------------------------------------------------- + +py::tuple py_find_min_global ( + py::object f, + py::list bound1, + py::list bound2, + py::list is_integer_variable, + unsigned long num_function_calls, + double solver_epsilon = 0 +) +{ + DLIB_CASSERT(len(bound1) == len(bound2)); + DLIB_CASSERT(len(bound1) == len(is_integer_variable)); + + auto func = [&](const matrix<double,0,1>& x) + { + return call_func(f, x); + }; + + auto result = find_min_global(func, list_to_mat(bound1), list_to_mat(bound2), + list_to_bool_vector(is_integer_variable), max_function_calls(num_function_calls), + solver_epsilon); + + return py::make_tuple(mat_to_list(result.x),result.y); +} + +py::tuple py_find_min_global2 ( + py::object f, + py::list bound1, + py::list bound2, + unsigned long num_function_calls, + double solver_epsilon = 0 +) +{ + DLIB_CASSERT(len(bound1) == len(bound2)); + + auto func = [&](const matrix<double,0,1>& x) + { + return call_func(f, x); + }; + + auto result = find_min_global(func, list_to_mat(bound1), list_to_mat(bound2), max_function_calls(num_function_calls), solver_epsilon); + + return py::make_tuple(mat_to_list(result.x),result.y); +} + +// ---------------------------------------------------------------------------------------- + +function_spec py_function_spec1 ( + py::list a, + py::list b +) +{ + return function_spec(list_to_mat(a), list_to_mat(b)); +} + +function_spec py_function_spec2 ( + py::list a, + py::list b, + py::list c +) +{ + return function_spec(list_to_mat(a), list_to_mat(b), list_to_bool_vector(c)); +} + +std::shared_ptr<global_function_search> py_global_function_search1 ( + py::list functions +) +{ + std::vector<function_spec> tmp; + for (auto i : functions) + tmp.emplace_back(i.cast<function_spec>()); + + return std::make_shared<global_function_search>(tmp); +} + +std::shared_ptr<global_function_search> py_global_function_search2 ( + py::list functions, + py::list initial_function_evals, + double relative_noise_magnitude +) +{ + std::vector<function_spec> specs; + for (auto i : functions) + specs.emplace_back(i.cast<function_spec>()); + + std::vector<std::vector<function_evaluation>> func_evals; + for (auto i : initial_function_evals) + { + std::vector<function_evaluation> evals; + for (auto j : i) + { + evals.emplace_back(j.cast<function_evaluation>()); + } + func_evals.emplace_back(std::move(evals)); + } + + return std::make_shared<global_function_search>(specs, func_evals, relative_noise_magnitude); +} + +function_evaluation py_function_evaluation( + const py::list& x, + double y +) +{ + return function_evaluation(list_to_mat(x), y); +} + +// ---------------------------------------------------------------------------------------- + +void bind_global_optimization(py::module& m) +{ + /*! + requires + - len(bound1) == len(bound2) == len(is_integer_variable) + - for all valid i: bound1[i] != bound2[i] + - solver_epsilon >= 0 + - f() is a real valued multi-variate function. It must take scalar real + numbers as its arguments and the number of arguments must be len(bound1). + ensures + - This function performs global optimization on the given f() function. + The goal is to maximize the following objective function: + f(x) + subject to the constraints: + min(bound1[i],bound2[i]) <= x[i] <= max(bound1[i],bound2[i]) + if (is_integer_variable[i]) then x[i] is an integer. + - find_max_global() runs until it has called f() num_function_calls times. + Then it returns the best x it has found along with the corresponding output + of f(). That is, it returns (best_x_seen,f(best_x_seen)). Here best_x_seen + is a list containing the best arguments to f() this function has found. + - find_max_global() uses a global optimization method based on a combination of + non-parametric global function modeling and quadratic trust region modeling + to efficiently find a global maximizer. It usually does a good job with a + relatively small number of calls to f(). For more information on how it + works read the documentation for dlib's global_function_search object. + However, one notable element is the solver epsilon, which you can adjust. + + The search procedure will only attempt to find a global maximizer to at most + solver_epsilon accuracy. Once a local maximizer is found to that accuracy + the search will focus entirely on finding other maxima elsewhere rather than + on further improving the current local optima found so far. That is, once a + local maxima is identified to about solver_epsilon accuracy, the algorithm + will spend all its time exploring the function to find other local maxima to + investigate. An epsilon of 0 means it will keep solving until it reaches + full floating point precision. Larger values will cause it to switch to pure + global exploration sooner and therefore might be more effective if your + objective function has many local maxima and you don't care about a super + high precision solution. + - Any variables that satisfy the following conditions are optimized on a log-scale: + - The lower bound on the variable is > 0 + - The ratio of the upper bound to lower bound is > 1000 + - The variable is not an integer variable + We do this because it's common to optimize machine learning models that have + parameters with bounds in a range such as [1e-5 to 1e10] (e.g. the SVM C + parameter) and it's much more appropriate to optimize these kinds of + variables on a log scale. So we transform them by applying log() to + them and then undo the transform via exp() before invoking the function + being optimized. Therefore, this transformation is invisible to the user + supplied functions. In most cases, it improves the efficiency of the + optimizer. + !*/ + { + m.def("find_max_global", &py_find_max_global, +"requires \n\ + - len(bound1) == len(bound2) == len(is_integer_variable) \n\ + - for all valid i: bound1[i] != bound2[i] \n\ + - solver_epsilon >= 0 \n\ + - f() is a real valued multi-variate function. It must take scalar real \n\ + numbers as its arguments and the number of arguments must be len(bound1). \n\ +ensures \n\ + - This function performs global optimization on the given f() function. \n\ + The goal is to maximize the following objective function: \n\ + f(x) \n\ + subject to the constraints: \n\ + min(bound1[i],bound2[i]) <= x[i] <= max(bound1[i],bound2[i]) \n\ + if (is_integer_variable[i]) then x[i] is an integer. \n\ + - find_max_global() runs until it has called f() num_function_calls times. \n\ + Then it returns the best x it has found along with the corresponding output \n\ + of f(). That is, it returns (best_x_seen,f(best_x_seen)). Here best_x_seen \n\ + is a list containing the best arguments to f() this function has found. \n\ + - find_max_global() uses a global optimization method based on a combination of \n\ + non-parametric global function modeling and quadratic trust region modeling \n\ + to efficiently find a global maximizer. It usually does a good job with a \n\ + relatively small number of calls to f(). For more information on how it \n\ + works read the documentation for dlib's global_function_search object. \n\ + However, one notable element is the solver epsilon, which you can adjust. \n\ + \n\ + The search procedure will only attempt to find a global maximizer to at most \n\ + solver_epsilon accuracy. Once a local maximizer is found to that accuracy \n\ + the search will focus entirely on finding other maxima elsewhere rather than \n\ + on further improving the current local optima found so far. That is, once a \n\ + local maxima is identified to about solver_epsilon accuracy, the algorithm \n\ + will spend all its time exploring the function to find other local maxima to \n\ + investigate. An epsilon of 0 means it will keep solving until it reaches \n\ + full floating point precision. Larger values will cause it to switch to pure \n\ + global exploration sooner and therefore might be more effective if your \n\ + objective function has many local maxima and you don't care about a super \n\ + high precision solution. \n\ + - Any variables that satisfy the following conditions are optimized on a log-scale: \n\ + - The lower bound on the variable is > 0 \n\ + - The ratio of the upper bound to lower bound is > 1000 \n\ + - The variable is not an integer variable \n\ + We do this because it's common to optimize machine learning models that have \n\ + parameters with bounds in a range such as [1e-5 to 1e10] (e.g. the SVM C \n\ + parameter) and it's much more appropriate to optimize these kinds of \n\ + variables on a log scale. So we transform them by applying log() to \n\ + them and then undo the transform via exp() before invoking the function \n\ + being optimized. Therefore, this transformation is invisible to the user \n\ + supplied functions. In most cases, it improves the efficiency of the \n\ + optimizer." + , + py::arg("f"), py::arg("bound1"), py::arg("bound2"), py::arg("is_integer_variable"), py::arg("num_function_calls"), py::arg("solver_epsilon")=0 + ); + } + + { + m.def("find_max_global", &py_find_max_global2, + "This function simply calls the other version of find_max_global() with is_integer_variable set to False for all variables.", + py::arg("f"), py::arg("bound1"), py::arg("bound2"), py::arg("num_function_calls"), py::arg("solver_epsilon")=0 + ); + } + + + + { + m.def("find_min_global", &py_find_min_global, + "This function is just like find_max_global(), except it performs minimization rather than maximization." + , + py::arg("f"), py::arg("bound1"), py::arg("bound2"), py::arg("is_integer_variable"), py::arg("num_function_calls"), py::arg("solver_epsilon")=0 + ); + } + + { + m.def("find_min_global", &py_find_min_global2, + "This function simply calls the other version of find_min_global() with is_integer_variable set to False for all variables.", + py::arg("f"), py::arg("bound1"), py::arg("bound2"), py::arg("num_function_calls"), py::arg("solver_epsilon")=0 + ); + } + + // ------------------------------------------------- + // ------------------------------------------------- + + + py::class_<function_evaluation> (m, "function_evaluation", R"RAW( +This object records the output of a real valued function in response to +some input. + +In particular, if you have a function F(x) then the function_evaluation is +simply a struct that records x and the scalar value F(x). )RAW") + .def(py::init<matrix<double,0,1>,double>(), py::arg("x"), py::arg("y")) + .def(py::init<>(&py_function_evaluation), py::arg("x"), py::arg("y")) + .def_readonly("x", &function_evaluation::x) + .def_readonly("y", &function_evaluation::y); + + + py::class_<function_spec> (m, "function_spec", "See: http://dlib.net/dlib/global_optimization/global_function_search_abstract.h.html") + .def(py::init<matrix<double,0,1>,matrix<double,0,1>>(), py::arg("bound1"), py::arg("bound2") ) + .def(py::init<matrix<double,0,1>,matrix<double,0,1>,std::vector<bool>>(), py::arg("bound1"), py::arg("bound2"), py::arg("is_integer") ) + .def(py::init<>(&py_function_spec1), py::arg("bound1"), py::arg("bound2")) + .def(py::init<>(&py_function_spec2), py::arg("bound1"), py::arg("bound2"), py::arg("is_integer")) + .def_readonly("lower", &function_spec::lower) + .def_readonly("upper", &function_spec::upper) + .def_readonly("is_integer_variable", &function_spec::is_integer_variable); + + + py::class_<function_evaluation_request> (m, "function_evaluation_request", "See: http://dlib.net/dlib/global_optimization/global_function_search_abstract.h.html") + .def_property_readonly("function_idx", &function_evaluation_request::function_idx) + .def_property_readonly("x", &function_evaluation_request::x) + .def_property_readonly("has_been_evaluated", &function_evaluation_request::has_been_evaluated) + .def("set", &function_evaluation_request::set); + + py::class_<global_function_search, std::shared_ptr<global_function_search>> (m, "global_function_search", "See: http://dlib.net/dlib/global_optimization/global_function_search_abstract.h.html") + .def(py::init<function_spec>(), py::arg("function")) + .def(py::init<>(&py_global_function_search1), py::arg("functions")) + .def(py::init<>(&py_global_function_search2), py::arg("functions"), py::arg("initial_function_evals"), py::arg("relative_noise_magnitude")) + .def("set_seed", &global_function_search::set_seed, py::arg("seed")) + .def("num_functions", &global_function_search::num_functions) + .def("get_function_evaluations", [](const global_function_search& self) { + std::vector<function_spec> specs; + std::vector<std::vector<function_evaluation>> function_evals; + self.get_function_evaluations(specs,function_evals); + py::list py_specs, py_func_evals; + for (auto& s : specs) + py_specs.append(s); + for (auto& i : function_evals) + { + py::list tmp; + for (auto& j : i) + tmp.append(j); + py_func_evals.append(tmp); + } + return py::make_tuple(py_specs,py_func_evals);}) + .def("get_best_function_eval", [](const global_function_search& self) { + matrix<double,0,1> x; double y; size_t idx; self.get_best_function_eval(x,y,idx); return py::make_tuple(x,y,idx);}) + .def("get_next_x", &global_function_search::get_next_x) + .def("get_pure_random_search_probability", &global_function_search::get_pure_random_search_probability) + .def("set_pure_random_search_probability", &global_function_search::set_pure_random_search_probability, py::arg("prob")) + .def("get_solver_epsilon", &global_function_search::get_solver_epsilon) + .def("set_solver_epsilon", &global_function_search::set_solver_epsilon, py::arg("eps")) + .def("get_relative_noise_magnitude", &global_function_search::get_relative_noise_magnitude) + .def("set_relative_noise_magnitude", &global_function_search::set_relative_noise_magnitude, py::arg("value")) + .def("get_monte_carlo_upper_bound_sample_num", &global_function_search::get_monte_carlo_upper_bound_sample_num) + .def("set_monte_carlo_upper_bound_sample_num", &global_function_search::set_monte_carlo_upper_bound_sample_num, py::arg("num")) + ; + +} + diff --git a/ml/dlib/tools/python/src/gui.cpp b/ml/dlib/tools/python/src/gui.cpp new file mode 100644 index 00000000..418cfaae --- /dev/null +++ b/ml/dlib/tools/python/src/gui.cpp @@ -0,0 +1,128 @@ +#ifndef DLIB_NO_GUI_SUPPORT + +#include "opaque_types.h" +#include <dlib/python.h> +#include <dlib/geometry.h> +#include <dlib/image_processing/frontal_face_detector.h> +#include <dlib/image_processing/render_face_detections.h> +#include <dlib/gui_widgets.h> +#include "simple_object_detector_py.h" + +using namespace dlib; +using namespace std; + +namespace py = pybind11; + +// ---------------------------------------------------------------------------------------- + +void image_window_set_image_fhog_detector ( + image_window& win, + const simple_object_detector& det +) +{ + win.set_image(draw_fhog(det)); +} + +void image_window_set_image_simple_detector_py ( + image_window& win, + const simple_object_detector_py& det +) +{ + win.set_image(draw_fhog(det.detector)); +} + +// ---------------------------------------------------------------------------------------- + +void image_window_set_image ( + image_window& win, + py::object img +) +{ + if (is_gray_python_image(img)) + return win.set_image(numpy_gray_image(img)); + else if (is_rgb_python_image(img)) + return win.set_image(numpy_rgb_image(img)); + else + throw dlib::error("Unsupported image type, must be 8bit gray or RGB image."); +} + +void add_overlay_rect ( + image_window& win, + const rectangle& rect, + const rgb_pixel& color +) +{ + win.add_overlay(rect, color); +} + +void add_overlay_drect ( + image_window& win, + const drectangle& drect, + const rgb_pixel& color +) +{ + rectangle rect(drect.left(), drect.top(), drect.right(), drect.bottom()); + win.add_overlay(rect, color); +} + +void add_overlay_parts ( + image_window& win, + const full_object_detection& detection, + const rgb_pixel& color +) +{ + win.add_overlay(render_face_detections(detection, color)); +} + +std::shared_ptr<image_window> make_image_window_from_image(py::object img) +{ + auto win = std::make_shared<image_window>(); + image_window_set_image(*win, img); + return win; +} + +std::shared_ptr<image_window> make_image_window_from_image_and_title(py::object img, const string& title) +{ + auto win = std::make_shared<image_window>(); + image_window_set_image(*win, img); + win->set_title(title); + return win; +} + +// ---------------------------------------------------------------------------------------- + +void bind_gui(py::module& m) +{ + { + typedef image_window type; + typedef void (image_window::*set_title_funct)(const std::string&); + typedef void (image_window::*add_overlay_funct)(const std::vector<rectangle>& r, rgb_pixel p); + py::class_<type, std::shared_ptr<type>>(m, "image_window", + "This is a GUI window capable of showing images on the screen.") + .def(py::init()) + .def(py::init(&make_image_window_from_image), + "Create an image window that displays the given numpy image.") + .def(py::init(&make_image_window_from_image_and_title), + "Create an image window that displays the given numpy image and also has the given title.") + .def("set_image", image_window_set_image_simple_detector_py, py::arg("detector"), + "Make the image_window display the given HOG detector's filters.") + .def("set_image", image_window_set_image_fhog_detector, py::arg("detector"), + "Make the image_window display the given HOG detector's filters.") + .def("set_image", image_window_set_image, py::arg("image"), + "Make the image_window display the given image.") + .def("set_title", (set_title_funct)&type::set_title, py::arg("title"), + "Set the title of the window to the given value.") + .def("clear_overlay", &type::clear_overlay, "Remove all overlays from the image_window.") + .def("add_overlay", (add_overlay_funct)&type::add_overlay<rgb_pixel>, py::arg("rectangles"), py::arg("color")=rgb_pixel(255, 0, 0), + "Add a list of rectangles to the image_window. They will be displayed as red boxes by default, but the color can be passed.") + .def("add_overlay", add_overlay_rect, py::arg("rectangle"), py::arg("color")=rgb_pixel(255, 0, 0), + "Add a rectangle to the image_window. It will be displayed as a red box by default, but the color can be passed.") + .def("add_overlay", add_overlay_drect, py::arg("rectangle"), py::arg("color")=rgb_pixel(255, 0, 0), + "Add a rectangle to the image_window. It will be displayed as a red box by default, but the color can be passed.") + .def("add_overlay", add_overlay_parts, py::arg("detection"), py::arg("color")=rgb_pixel(0, 0, 255), + "Add full_object_detection parts to the image window. They will be displayed as blue lines by default, but the color can be passed.") + .def("wait_until_closed", &type::wait_until_closed, + "This function blocks until the window is closed."); + } +} +#endif diff --git a/ml/dlib/tools/python/src/image.cpp b/ml/dlib/tools/python/src/image.cpp new file mode 100644 index 00000000..bd43ce5a --- /dev/null +++ b/ml/dlib/tools/python/src/image.cpp @@ -0,0 +1,40 @@ +#include "opaque_types.h" +#include <dlib/python.h> +#include "dlib/pixel.h" +#include <dlib/image_transforms.h> + +using namespace dlib; +using namespace std; + +namespace py = pybind11; + +// ---------------------------------------------------------------------------------------- + +string print_rgb_pixel_str(const rgb_pixel& p) +{ + std::ostringstream sout; + sout << "red: "<< (int)p.red + << ", green: "<< (int)p.green + << ", blue: "<< (int)p.blue; + return sout.str(); +} + +string print_rgb_pixel_repr(const rgb_pixel& p) +{ + std::ostringstream sout; + sout << "rgb_pixel(" << (int)p.red << "," << (int)p.green << "," << (int)p.blue << ")"; + return sout.str(); +} + +// ---------------------------------------------------------------------------------------- + +void bind_image_classes(py::module& m) +{ + py::class_<rgb_pixel>(m, "rgb_pixel") + .def(py::init<unsigned char,unsigned char,unsigned char>(), py::arg("red"), py::arg("green"), py::arg("blue")) + .def("__str__", &print_rgb_pixel_str) + .def("__repr__", &print_rgb_pixel_repr) + .def_readwrite("red", &rgb_pixel::red) + .def_readwrite("green", &rgb_pixel::green) + .def_readwrite("blue", &rgb_pixel::blue); +} diff --git a/ml/dlib/tools/python/src/image_dataset_metadata.cpp b/ml/dlib/tools/python/src/image_dataset_metadata.cpp new file mode 100644 index 00000000..8f23ddd3 --- /dev/null +++ b/ml/dlib/tools/python/src/image_dataset_metadata.cpp @@ -0,0 +1,279 @@ +// Copyright (C) 2018 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. + +#include "opaque_types.h" +#include <dlib/python.h> +#include <dlib/data_io.h> +#include <dlib/image_processing.h> +#include <pybind11/stl_bind.h> +#include <pybind11/stl.h> +#include <iostream> + +namespace pybind11 +{ + + // a version of bind_map that doesn't force it's own __repr__ on you. +template <typename Map, typename holder_type = std::unique_ptr<Map>, typename... Args> +class_<Map, holder_type> bind_map_no_default_repr(handle scope, const std::string &name, Args&&... args) { + using KeyType = typename Map::key_type; + using MappedType = typename Map::mapped_type; + using Class_ = class_<Map, holder_type>; + + // If either type is a non-module-local bound type then make the map binding non-local as well; + // otherwise (e.g. both types are either module-local or converting) the map will be + // module-local. + auto tinfo = detail::get_type_info(typeid(MappedType)); + bool local = !tinfo || tinfo->module_local; + if (local) { + tinfo = detail::get_type_info(typeid(KeyType)); + local = !tinfo || tinfo->module_local; + } + + Class_ cl(scope, name.c_str(), pybind11::module_local(local), std::forward<Args>(args)...); + + cl.def(init<>()); + + + cl.def("__bool__", + [](const Map &m) -> bool { return !m.empty(); }, + "Check whether the map is nonempty" + ); + + cl.def("__iter__", + [](Map &m) { return make_key_iterator(m.begin(), m.end()); }, + keep_alive<0, 1>() /* Essential: keep list alive while iterator exists */ + ); + + cl.def("items", + [](Map &m) { return make_iterator(m.begin(), m.end()); }, + keep_alive<0, 1>() /* Essential: keep list alive while iterator exists */ + ); + + cl.def("__getitem__", + [](Map &m, const KeyType &k) -> MappedType & { + auto it = m.find(k); + if (it == m.end()) + throw key_error(); + return it->second; + }, + return_value_policy::reference_internal // ref + keepalive + ); + + // Assignment provided only if the type is copyable + detail::map_assignment<Map, Class_>(cl); + + cl.def("__delitem__", + [](Map &m, const KeyType &k) { + auto it = m.find(k); + if (it == m.end()) + throw key_error(); + return m.erase(it); + } + ); + + cl.def("__len__", &Map::size); + + return cl; +} + +} + +using namespace dlib; +using namespace std; +using namespace dlib::image_dataset_metadata; + +namespace py = pybind11; + + +dataset py_load_image_dataset_metadata( + const std::string& filename +) +{ + dataset temp; + load_image_dataset_metadata(temp, filename); + return temp; +} + +std::shared_ptr<std::map<std::string,point>> map_from_object(py::dict obj) +{ + auto ret = std::make_shared<std::map<std::string,point>>(); + for (auto& v : obj) + { + (*ret)[v.first.cast<std::string>()] = v.second.cast<point>(); + } + return ret; +} + +// ---------------------------------------------------------------------------------------- + +image_dataset_metadata::dataset py_make_bounding_box_regression_training_data ( + const image_dataset_metadata::dataset& truth, + const py::object& detections +) +{ + try + { + // if detections is a std::vector then call like this. + return make_bounding_box_regression_training_data(truth, detections.cast<const std::vector<std::vector<rectangle>>&>()); + } + catch (py::cast_error&) + { + // otherwise, detections should be a list of std::vectors. + py::list dets(detections); + std::vector<std::vector<rectangle>> temp; + for (auto& d : dets) + temp.emplace_back(d.cast<const std::vector<rectangle>&>()); + return make_bounding_box_regression_training_data(truth, temp); + } +} + +// ---------------------------------------------------------------------------------------- + +void bind_image_dataset_metadata(py::module &m_) +{ + auto m = m_.def_submodule("image_dataset_metadata", "Routines and objects for working with dlib's image dataset metadata XML files."); + + auto datasetstr = [](const dataset& item) { return "dlib.dataset_dataset_metadata.dataset: images:" + to_string(item.images.size()) + ", " + item.name; }; + auto datasetrepr = [datasetstr](const dataset& item) { return "<"+datasetstr(item)+">"; }; + py::class_<dataset>(m, "dataset", + "This object represents a labeled set of images. In particular, it contains the filename for each image as well as annotated boxes.") + .def("__str__", datasetstr) + .def("__repr__", datasetrepr) + .def_readwrite("images", &dataset::images) + .def_readwrite("comment", &dataset::comment) + .def_readwrite("name", &dataset::name); + + auto imagestr = [](const image& item) { return "dlib.image_dataset_metadata.image: boxes:"+to_string(item.boxes.size())+ ", " + item.filename; }; + auto imagerepr = [imagestr](const image& item) { return "<"+imagestr(item)+">"; }; + py::class_<image>(m, "image", "This object represents an annotated image.") + .def_readwrite("filename", &image::filename) + .def("__str__", imagestr) + .def("__repr__", imagerepr) + .def_readwrite("boxes", &image::boxes); + + + auto partsstr = [](const std::map<std::string,point>& item) { + std::ostringstream sout; + sout << "{"; + for (auto& v : item) + sout << "'" << v.first << "': " << v.second << ", "; + sout << "}"; + return sout.str(); + }; + auto partsrepr = [](const std::map<std::string,point>& item) { + std::ostringstream sout; + sout << "dlib.image_dataset_metadata.parts({\n"; + for (auto& v : item) + sout << "'" << v.first << "': dlib.point" << v.second << ",\n"; + sout << "})"; + return sout.str(); + }; + + py::bind_map_no_default_repr<std::map<std::string,point>, std::shared_ptr<std::map<std::string,point>> >(m, "parts", + "This object is a dictionary mapping string names to object part locations.") + .def(py::init(&map_from_object)) + .def("__str__", partsstr) + .def("__repr__", partsrepr); + + + auto rectstr = [](const rectangle& r) { + std::ostringstream sout; + sout << "dlib.rectangle(" << r.left() << "," << r.top() << "," << r.right() << "," << r.bottom() << ")"; + return sout.str(); + }; + auto boxstr = [rectstr](const box& item) { return "dlib.image_dataset_metadata.box at " + rectstr(item.rect); }; + auto boxrepr = [boxstr](const box& item) { return "<"+boxstr(item)+">"; }; + py::class_<box> pybox(m, "box", + "This object represents an annotated rectangular area of an image. \n" + "It is typically used to mark the location of an object such as a \n" + "person, car, etc.\n" + "\n" + "The main variable of interest is rect. It gives the location of \n" + "the box. All the other variables are optional." ); pybox + .def("__str__", boxstr) + .def("__repr__", boxrepr) + .def_readwrite("rect", &box::rect) + .def_readonly("parts", &box::parts) + .def_readwrite("label", &box::label) + .def_readwrite("difficult", &box::difficult) + .def_readwrite("truncated", &box::truncated) + .def_readwrite("occluded", &box::occluded) + .def_readwrite("ignore", &box::ignore) + .def_readwrite("pose", &box::pose) + .def_readwrite("detection_score", &box::detection_score) + .def_readwrite("angle", &box::angle) + .def_readwrite("gender", &box::gender) + .def_readwrite("age", &box::age); + + py::enum_<gender_t>(pybox,"gender_type") + .value("MALE", gender_t::MALE) + .value("FEMALE", gender_t::FEMALE) + .value("UNKNOWN", gender_t::UNKNOWN) + .export_values(); + + + m.def("save_image_dataset_metadata", &save_image_dataset_metadata, py::arg("data"), py::arg("filename"), + "Writes the contents of the meta object to a file with the given filename. The file will be in an XML format." + ); + + m.def("load_image_dataset_metadata", &py_load_image_dataset_metadata, py::arg("filename"), + "Attempts to interpret filename as a file containing XML formatted data as produced " + "by the save_image_dataset_metadata() function. The data is loaded and returned as a dlib.image_dataset_metadata.dataset object." + ); + + m_.def("make_bounding_box_regression_training_data", &py_make_bounding_box_regression_training_data, + py::arg("truth"), py::arg("detections"), +"requires \n\ + - len(truth.images) == len(detections) \n\ + - detections == A dlib.rectangless object or a list of dlib.rectangles. \n\ +ensures \n\ + - Suppose you have an object detector that can roughly locate objects in an \n\ + image. This means your detector draws boxes around objects, but these are \n\ + *rough* boxes in the sense that they aren't positioned super accurately. For \n\ + instance, HOG based detectors usually have a stride of 8 pixels. So the \n\ + positional accuracy is going to be, at best, +/-8 pixels. \n\ + \n\ + If you want to get better positional accuracy one easy thing to do is train a \n\ + shape_predictor to give you the corners of the object. The \n\ + make_bounding_box_regression_training_data() routine helps you do this by \n\ + creating an appropriate training dataset. It does this by taking the dataset \n\ + you used to train your detector (the truth object), and combining that with \n\ + the output of your detector on each image in the training dataset (the \n\ + detections object). In particular, it will create a new annotated dataset \n\ + where each object box is one of the rectangles from detections and that \n\ + object has 4 part annotations, the corners of the truth rectangle \n\ + corresponding to that detection rectangle. You can then take the returned \n\ + dataset and train a shape_predictor on it. The resulting shape_predictor can \n\ + then be used to do bounding box regression. \n\ + - We assume that detections[i] contains object detections corresponding to \n\ + the image truth.images[i]." + /*! + requires + - len(truth.images) == len(detections) + - detections == A dlib.rectangless object or a list of dlib.rectangles. + ensures + - Suppose you have an object detector that can roughly locate objects in an + image. This means your detector draws boxes around objects, but these are + *rough* boxes in the sense that they aren't positioned super accurately. For + instance, HOG based detectors usually have a stride of 8 pixels. So the + positional accuracy is going to be, at best, +/-8 pixels. + + If you want to get better positional accuracy one easy thing to do is train a + shape_predictor to give you the corners of the object. The + make_bounding_box_regression_training_data() routine helps you do this by + creating an appropriate training dataset. It does this by taking the dataset + you used to train your detector (the truth object), and combining that with + the output of your detector on each image in the training dataset (the + detections object). In particular, it will create a new annotated dataset + where each object box is one of the rectangles from detections and that + object has 4 part annotations, the corners of the truth rectangle + corresponding to that detection rectangle. You can then take the returned + dataset and train a shape_predictor on it. The resulting shape_predictor can + then be used to do bounding box regression. + - We assume that detections[i] contains object detections corresponding to + the image truth.images[i]. + !*/ + ); +} + + diff --git a/ml/dlib/tools/python/src/indexing.h b/ml/dlib/tools/python/src/indexing.h new file mode 100644 index 00000000..3aa398f0 --- /dev/null +++ b/ml/dlib/tools/python/src/indexing.h @@ -0,0 +1,11 @@ +// Copyright (C) 2014 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_PYTHON_INDEXING_H__ +#define DLIB_PYTHON_INDEXING_H__ + +namespace dlib +{ + template <typename T> + void resize(T& v, unsigned long n) { v.resize(n); } +} +#endif // DLIB_PYTHON_INDEXING_H__ diff --git a/ml/dlib/tools/python/src/matrix.cpp b/ml/dlib/tools/python/src/matrix.cpp new file mode 100644 index 00000000..a9354482 --- /dev/null +++ b/ml/dlib/tools/python/src/matrix.cpp @@ -0,0 +1,209 @@ +// Copyright (C) 2013 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. + +#include "opaque_types.h" +#include <dlib/python.h> +#include <dlib/matrix.h> +#include <dlib/string.h> +#include <pybind11/pybind11.h> + +using namespace dlib; +namespace py = pybind11; +using std::string; +using std::ostringstream; + + +void matrix_set_size(matrix<double>& m, long nr, long nc) +{ + m.set_size(nr,nc); + m = 0; +} + +string matrix_double__repr__(matrix<double>& c) +{ + ostringstream sout; + sout << "< dlib.matrix containing: \n"; + sout << c; + return trim(sout.str()) + " >"; +} + +string matrix_double__str__(matrix<double>& c) +{ + ostringstream sout; + sout << c; + return trim(sout.str()); +} + +std::shared_ptr<matrix<double> > make_matrix_from_size(long nr, long nc) +{ + if (nr < 0 || nc < 0) + { + PyErr_SetString( PyExc_IndexError, "Input dimensions can't be negative." + ); + throw py::error_already_set(); + } + auto temp = std::make_shared<matrix<double>>(nr,nc); + *temp = 0; + return temp; +} + + +std::shared_ptr<matrix<double> > from_object(py::object obj) +{ + py::tuple s = obj.attr("shape").cast<py::tuple>(); + if (len(s) != 2) + { + PyErr_SetString( PyExc_IndexError, "Input must be a matrix or some kind of 2D array." + ); + throw py::error_already_set(); + } + + const long nr = s[0].cast<long>(); + const long nc = s[1].cast<long>(); + auto temp = std::make_shared<matrix<double>>(nr,nc); + for ( long r = 0; r < nr; ++r) + { + for (long c = 0; c < nc; ++c) + { + (*temp)(r,c) = obj[py::make_tuple(r,c)].cast<double>(); + } + } + return temp; +} + +std::shared_ptr<matrix<double> > from_list(py::list l) +{ + const long nr = py::len(l); + if (py::isinstance<py::list>(l[0])) + { + const long nc = py::len(l[0]); + // make sure all the other rows have the same length + for (long r = 1; r < nr; ++r) + pyassert(py::len(l[r]) == nc, "All rows of a matrix must have the same number of columns."); + + auto temp = std::make_shared<matrix<double>>(nr,nc); + for ( long r = 0; r < nr; ++r) + { + for (long c = 0; c < nc; ++c) + { + (*temp)(r,c) = l[r].cast<py::list>()[c].cast<double>(); + } + } + return temp; + } + else + { + // In this case we treat it like a column vector + auto temp = std::make_shared<matrix<double>>(nr,1); + for ( long r = 0; r < nr; ++r) + { + (*temp)(r) = l[r].cast<double>(); + } + return temp; + } +} + +long matrix_double__len__(matrix<double>& c) +{ + return c.nr(); +} + +struct mat_row +{ + mat_row() : data(0),size(0) {} + mat_row(double* data_, long size_) : data(data_),size(size_) {} + double* data; + long size; +}; + +void mat_row__setitem__(mat_row& c, long p, double val) +{ + if (p < 0) { + p = c.size + p; // negative index + } + if (p > c.size-1) { + PyErr_SetString( PyExc_IndexError, "3 index out of range" + ); + throw py::error_already_set(); + } + c.data[p] = val; +} + + +string mat_row__str__(mat_row& c) +{ + ostringstream sout; + sout << mat(c.data,1, c.size); + return sout.str(); +} + +string mat_row__repr__(mat_row& c) +{ + ostringstream sout; + sout << "< matrix row: " << mat(c.data,1, c.size); + return trim(sout.str()) + " >"; +} + +long mat_row__len__(mat_row& m) +{ + return m.size; +} + +double mat_row__getitem__(mat_row& m, long r) +{ + if (r < 0) { + r = m.size + r; // negative index + } + if (r > m.size-1 || r < 0) { + PyErr_SetString( PyExc_IndexError, "1 index out of range" + ); + throw py::error_already_set(); + } + return m.data[r]; +} + +mat_row matrix_double__getitem__(matrix<double>& m, long r) +{ + if (r < 0) { + r = m.nr() + r; // negative index + } + if (r > m.nr()-1 || r < 0) { + PyErr_SetString( PyExc_IndexError, (string("2 index out of range, got ") + cast_to_string(r)).c_str() + ); + throw py::error_already_set(); + } + return mat_row(&m(r,0),m.nc()); +} + + +py::tuple get_matrix_size(matrix<double>& m) +{ + return py::make_tuple(m.nr(), m.nc()); +} + +void bind_matrix(py::module& m) +{ + py::class_<mat_row>(m, "_row") + .def("__len__", &mat_row__len__) + .def("__repr__", &mat_row__repr__) + .def("__str__", &mat_row__str__) + .def("__setitem__", &mat_row__setitem__) + .def("__getitem__", &mat_row__getitem__); + + py::class_<matrix<double>, std::shared_ptr<matrix<double>>>(m, "matrix", + "This object represents a dense 2D matrix of floating point numbers." + "Moreover, it binds directly to the C++ type dlib::matrix<double>.") + .def(py::init<>()) + .def(py::init(&from_list)) + .def(py::init(&from_object)) + .def(py::init(&make_matrix_from_size)) + .def("set_size", &matrix_set_size, py::arg("rows"), py::arg("cols"), "Set the size of the matrix to the given number of rows and columns.") + .def("__repr__", &matrix_double__repr__) + .def("__str__", &matrix_double__str__) + .def("nr", &matrix<double>::nr, "Return the number of rows in the matrix.") + .def("nc", &matrix<double>::nc, "Return the number of columns in the matrix.") + .def("__len__", &matrix_double__len__) + .def("__getitem__", &matrix_double__getitem__, py::keep_alive<0,1>()) + .def_property_readonly("shape", &get_matrix_size) + .def(py::pickle(&getstate<matrix<double>>, &setstate<matrix<double>>)); +} diff --git a/ml/dlib/tools/python/src/numpy_returns.cpp b/ml/dlib/tools/python/src/numpy_returns.cpp new file mode 100644 index 00000000..235816a7 --- /dev/null +++ b/ml/dlib/tools/python/src/numpy_returns.cpp @@ -0,0 +1,158 @@ +#include "opaque_types.h" +#include <dlib/python.h> +#include "dlib/pixel.h" +#include <dlib/image_transforms.h> + +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#include <numpy/ndarrayobject.h> + + +using namespace dlib; +using namespace std; + +namespace py = pybind11; + +// ---------------------------------------------------------------------------------------- + +py::list get_jitter_images(py::object img, size_t num_jitters = 1, bool disturb_colors = false) +{ + static dlib::rand rnd_jitter; + if (!is_rgb_python_image(img)) + throw dlib::error("Unsupported image type, must be RGB image."); + + // Convert the image to matrix<rgb_pixel> for processing + matrix<rgb_pixel> img_mat; + assign_image(img_mat, numpy_rgb_image(img)); + + // The top level list (containing 1 or more images) to return to python + py::list jitter_list; + + size_t rows = num_rows(img_mat); + size_t cols = num_columns(img_mat); + + // Size of the numpy array + npy_intp dims[3] = { num_rows(img_mat), num_columns(img_mat), 3}; + + for (int i = 0; i < num_jitters; ++i) { + // Get a jittered crop + matrix<rgb_pixel> crop = dlib::jitter_image(img_mat, rnd_jitter); + // If required disturb colors of the image + if(disturb_colors) + dlib::disturb_colors(crop, rnd_jitter); + + PyObject *arr = PyArray_SimpleNew(3, dims, NPY_UINT8); + npy_uint8 *outdata = (npy_uint8 *) PyArray_DATA((PyArrayObject*) arr); + memcpy(outdata, image_data(crop), rows * width_step(crop)); + + py::handle handle(arr); + // Append image to jittered image list + jitter_list.append(handle); + } + + return jitter_list; +} + +// ---------------------------------------------------------------------------------------- + +py::list get_face_chips ( + py::object img, + const std::vector<full_object_detection>& faces, + size_t size = 150, + float padding = 0.25 +) +{ + if (!is_rgb_python_image(img)) + throw dlib::error("Unsupported image type, must be RGB image."); + + if (faces.size() < 1) { + throw dlib::error("No face were specified in the faces array."); + } + + py::list chips_list; + + std::vector<chip_details> dets; + for (auto& f : faces) + dets.push_back(get_face_chip_details(f, size, padding)); + dlib::array<matrix<rgb_pixel>> face_chips; + extract_image_chips(numpy_rgb_image(img), dets, face_chips); + + npy_intp rows = size; + npy_intp cols = size; + + // Size of the numpy array + npy_intp dims[3] = { rows, cols, 3}; + + for (auto& chip : face_chips) + { + PyObject *arr = PyArray_SimpleNew(3, dims, NPY_UINT8); + npy_uint8 *outdata = (npy_uint8 *) PyArray_DATA((PyArrayObject*) arr); + memcpy(outdata, image_data(chip), rows * width_step(chip)); + py::handle handle(arr); + + // Append image to chips list + chips_list.append(handle); + } + return chips_list; +} + +py::object get_face_chip ( + py::object img, + const full_object_detection& face, + size_t size = 150, + float padding = 0.25 +) +{ + if (!is_rgb_python_image(img)) + throw dlib::error("Unsupported image type, must be RGB image."); + + matrix<rgb_pixel> chip; + extract_image_chip(numpy_rgb_image(img), get_face_chip_details(face, size, padding), chip); + + // Size of the numpy array + npy_intp dims[3] = { num_rows(chip), num_columns(chip), 3}; + + PyObject *arr = PyArray_SimpleNew(3, dims, NPY_UINT8); + npy_uint8 *outdata = (npy_uint8 *) PyArray_DATA((PyArrayObject *) arr); + memcpy(outdata, image_data(chip), num_rows(chip) * width_step(chip)); + py::handle handle(arr); + return handle.cast<py::object>(); +} + +// ---------------------------------------------------------------------------------------- + +// we need this wonky stuff because different versions of numpy's import_array macro +// contain differently typed return statements inside import_array(). +#if PY_VERSION_HEX >= 0x03000000 +#define DLIB_NUMPY_IMPORT_ARRAY_RETURN_TYPE void* +#define DLIB_NUMPY_IMPORT_RETURN return 0 +#else +#define DLIB_NUMPY_IMPORT_ARRAY_RETURN_TYPE void +#define DLIB_NUMPY_IMPORT_RETURN return +#endif +DLIB_NUMPY_IMPORT_ARRAY_RETURN_TYPE import_numpy_stuff() +{ + import_array(); + DLIB_NUMPY_IMPORT_RETURN; +} + +void bind_numpy_returns(py::module &m) +{ + import_numpy_stuff(); + + m.def("jitter_image", &get_jitter_images, + "Takes an image and returns a list of jittered images." + "The returned list contains num_jitters images (default is 1)." + "If disturb_colors is set to True, the colors of the image are disturbed (default is False)", + py::arg("img"), py::arg("num_jitters")=1, py::arg("disturb_colors")=false + ); + + m.def("get_face_chip", &get_face_chip, + "Takes an image and a full_object_detection that references a face in that image and returns the face as a Numpy array representing the image. The face will be rotated upright and scaled to 150x150 pixels or with the optional specified size and padding.", + py::arg("img"), py::arg("face"), py::arg("size")=150, py::arg("padding")=0.25 + ); + + m.def("get_face_chips", &get_face_chips, + "Takes an image and a full_object_detections object that reference faces in that image and returns the faces as a list of Numpy arrays representing the image. The faces will be rotated upright and scaled to 150x150 pixels or with the optional specified size and padding.", + py::arg("img"), py::arg("faces"), py::arg("size")=150, py::arg("padding")=0.25 + ); +} diff --git a/ml/dlib/tools/python/src/numpy_returns_stub.cpp b/ml/dlib/tools/python/src/numpy_returns_stub.cpp new file mode 100644 index 00000000..07d38cea --- /dev/null +++ b/ml/dlib/tools/python/src/numpy_returns_stub.cpp @@ -0,0 +1,59 @@ +#include "opaque_types.h" +#include <dlib/python.h> +#include "dlib/pixel.h" +#include <dlib/image_transforms.h> + +using namespace dlib; +using namespace std; +namespace py = pybind11; + +// ---------------------------------------------------------------------------------------- + +py::list get_jitter_images(py::object img, size_t num_jitters = 1, bool disturb_colors = false) +{ + throw dlib::error("jitter_image is only supported if you compiled dlib with numpy installed!"); +} + +// ---------------------------------------------------------------------------------------- + +py::list get_face_chips ( + py::object img, + const std::vector<full_object_detection>& faces, + size_t size = 150, + float padding = 0.25 +) +{ + throw dlib::error("get_face_chips is only supported if you compiled dlib with numpy installed!"); +} + +py::object get_face_chip ( + py::object img, + const full_object_detection& face, + size_t size = 150, + float padding = 0.25 +) +{ + throw dlib::error("get_face_chip is only supported if you compiled dlib with numpy installed!"); +} + +// ---------------------------------------------------------------------------------------- + +void bind_numpy_returns(py::module &m) +{ + m.def("jitter_image", &get_jitter_images, + "Takes an image and returns a list of jittered images." + "The returned list contains num_jitters images (default is 1)." + "If disturb_colors is set to True, the colors of the image are disturbed (default is False)", + py::arg("img"), py::arg("num_jitters")=1, py::arg("disturb_colors")=false + ); + + m.def("get_face_chip", &get_face_chip, + "Takes an image and a full_object_detection that references a face in that image and returns the face as a Numpy array representing the image. The face will be rotated upright and scaled to 150x150 pixels or with the optional specified size and padding.", + py::arg("img"), py::arg("face"), py::arg("size")=150, py::arg("padding")=0.25 + ); + + m.def("get_face_chips", &get_face_chips, + "Takes an image and a full_object_detections object that reference faces in that image and returns the faces as a list of Numpy arrays representing the image. The faces will be rotated upright and scaled to 150x150 pixels or with the optional specified size and padding.", + py::arg("img"), py::arg("faces"), py::arg("size")=150, py::arg("padding")=0.25 + ); +} diff --git a/ml/dlib/tools/python/src/object_detection.cpp b/ml/dlib/tools/python/src/object_detection.cpp new file mode 100644 index 00000000..bda570d7 --- /dev/null +++ b/ml/dlib/tools/python/src/object_detection.cpp @@ -0,0 +1,376 @@ +// Copyright (C) 2015 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. + +#include "opaque_types.h" +#include <dlib/python.h> +#include <dlib/matrix.h> +#include <dlib/geometry.h> +#include <dlib/image_processing/frontal_face_detector.h> +#include "simple_object_detector.h" +#include "simple_object_detector_py.h" +#include "conversion.h" + +using namespace dlib; +using namespace std; + +namespace py = pybind11; + +// ---------------------------------------------------------------------------------------- + +string print_simple_test_results(const simple_test_results& r) +{ + std::ostringstream sout; + sout << "precision: "<<r.precision << ", recall: "<< r.recall << ", average precision: " << r.average_precision; + return sout.str(); +} + +// ---------------------------------------------------------------------------------------- + +inline simple_object_detector_py train_simple_object_detector_on_images_py ( + const py::list& pyimages, + const py::list& pyboxes, + const simple_object_detector_training_options& options +) +{ + const unsigned long num_images = py::len(pyimages); + if (num_images != py::len(pyboxes)) + throw dlib::error("The length of the boxes list must match the length of the images list."); + + // We never have any ignore boxes for this version of the API. + std::vector<std::vector<rectangle> > ignore(num_images), boxes(num_images); + dlib::array<array2d<rgb_pixel> > images(num_images); + images_and_nested_params_to_dlib(pyimages, pyboxes, images, boxes); + + return train_simple_object_detector_on_images("", images, boxes, ignore, options); +} + +inline simple_test_results test_simple_object_detector_with_images_py ( + const py::list& pyimages, + const py::list& pyboxes, + simple_object_detector& detector, + const unsigned int upsampling_amount +) +{ + const unsigned long num_images = py::len(pyimages); + if (num_images != py::len(pyboxes)) + throw dlib::error("The length of the boxes list must match the length of the images list."); + + // We never have any ignore boxes for this version of the API. + std::vector<std::vector<rectangle> > ignore(num_images), boxes(num_images); + dlib::array<array2d<rgb_pixel> > images(num_images); + images_and_nested_params_to_dlib(pyimages, pyboxes, images, boxes); + + return test_simple_object_detector_with_images(images, upsampling_amount, boxes, ignore, detector); +} + +// ---------------------------------------------------------------------------------------- + +inline simple_test_results test_simple_object_detector_py_with_images_py ( + const py::list& pyimages, + const py::list& pyboxes, + simple_object_detector_py& detector, + const int upsampling_amount +) +{ + // Allow users to pass an upsampling amount ELSE use the one cached on the object + // Anything less than 0 is ignored and the cached value is used. + unsigned int final_upsampling_amount = 0; + if (upsampling_amount >= 0) + final_upsampling_amount = upsampling_amount; + else + final_upsampling_amount = detector.upsampling_amount; + + return test_simple_object_detector_with_images_py(pyimages, pyboxes, detector.detector, final_upsampling_amount); +} + +// ---------------------------------------------------------------------------------------- + +inline void find_candidate_object_locations_py ( + py::object pyimage, + py::list& pyboxes, + py::tuple pykvals, + unsigned long min_size, + unsigned long max_merging_iterations +) +{ + // Copy the data into dlib based objects + array2d<rgb_pixel> image; + if (is_gray_python_image(pyimage)) + assign_image(image, numpy_gray_image(pyimage)); + else if (is_rgb_python_image(pyimage)) + assign_image(image, numpy_rgb_image(pyimage)); + else + throw dlib::error("Unsupported image type, must be 8bit gray or RGB image."); + + if (py::len(pykvals) != 3) + throw dlib::error("kvals must be a tuple with three elements for start, end, num."); + + double start = pykvals[0].cast<double>(); + double end = pykvals[1].cast<double>(); + long num = pykvals[2].cast<long>(); + matrix_range_exp<double> kvals = linspace(start, end, num); + + std::vector<rectangle> rects; + const long count = py::len(pyboxes); + // Copy any rectangles in the input pyboxes into rects so that any rectangles will be + // properly deduped in the resulting output. + for (long i = 0; i < count; ++i) + rects.push_back(pyboxes[i].cast<rectangle>()); + // Find candidate objects + find_candidate_object_locations(image, rects, kvals, min_size, max_merging_iterations); + + // Collect boxes containing candidate objects + std::vector<rectangle>::iterator iter; + for (iter = rects.begin(); iter != rects.end(); ++iter) + pyboxes.append(*iter); +} + +// ---------------------------------------------------------------------------------------- + +void bind_object_detection(py::module& m) +{ + { + typedef simple_object_detector_training_options type; + py::class_<type>(m, "simple_object_detector_training_options", + "This object is a container for the options to the train_simple_object_detector() routine.") + .def(py::init()) + .def_readwrite("be_verbose", &type::be_verbose, +"If true, train_simple_object_detector() will print out a lot of information to the screen while training.") + .def_readwrite("add_left_right_image_flips", &type::add_left_right_image_flips, +"if true, train_simple_object_detector() will assume the objects are \n\ +left/right symmetric and add in left right flips of the training \n\ +images. This doubles the size of the training dataset.") + .def_readwrite("detection_window_size", &type::detection_window_size, + "The sliding window used will have about this many pixels inside it.") + .def_readwrite("C", &type::C, +"C is the usual SVM C regularization parameter. So it is passed to \n\ +structural_object_detection_trainer::set_c(). Larger values of C \n\ +will encourage the trainer to fit the data better but might lead to \n\ +overfitting. Therefore, you must determine the proper setting of \n\ +this parameter experimentally.") + .def_readwrite("epsilon", &type::epsilon, +"epsilon is the stopping epsilon. Smaller values make the trainer's \n\ +solver more accurate but might take longer to train.") + .def_readwrite("num_threads", &type::num_threads, +"train_simple_object_detector() will use this many threads of \n\ +execution. Set this to the number of CPU cores on your machine to \n\ +obtain the fastest training speed.") + .def_readwrite("upsample_limit", &type::upsample_limit, +"train_simple_object_detector() will upsample images if needed \n\ +no more than upsample_limit times. Value 0 will forbid trainer to \n\ +upsample any images. If trainer is unable to fit all boxes with \n\ +required upsample_limit, exception will be thrown. Higher values \n\ +of upsample_limit exponentially increases memory requiremens. \n\ +Values higher than 2 (default) are not recommended."); + } + { + typedef simple_test_results type; + py::class_<type>(m, "simple_test_results") + .def_readwrite("precision", &type::precision) + .def_readwrite("recall", &type::recall) + .def_readwrite("average_precision", &type::average_precision) + .def("__str__", &::print_simple_test_results); + } + + // Here, kvals is actually the result of linspace(start, end, num) and it is different from kvals used + // in find_candidate_object_locations(). See dlib/image_transforms/segment_image_abstract.h for more details. + m.def("find_candidate_object_locations", find_candidate_object_locations_py, py::arg("image"), py::arg("rects"), py::arg("kvals")=py::make_tuple(50, 200, 3), py::arg("min_size")=20, py::arg("max_merging_iterations")=50, +"Returns found candidate objects\n\ +requires\n\ + - image == an image object which is a numpy ndarray\n\ + - len(kvals) == 3\n\ + - kvals should be a tuple that specifies the range of k values to use. In\n\ + particular, it should take the form (start, end, num) where num > 0. \n\ +ensures\n\ + - This function takes an input image and generates a set of candidate\n\ + rectangles which are expected to bound any objects in the image. It does\n\ + this by running a version of the segment_image() routine on the image and\n\ + then reports rectangles containing each of the segments as well as rectangles\n\ + containing unions of adjacent segments. The basic idea is described in the\n\ + paper: \n\ + Segmentation as Selective Search for Object Recognition by Koen E. A. van de Sande, et al.\n\ + Note that this function deviates from what is described in the paper slightly. \n\ + See the code for details.\n\ + - The basic segmentation is performed kvals[2] times, each time with the k parameter\n\ + (see segment_image() and the Felzenszwalb paper for details on k) set to a different\n\ + value from the range of numbers linearly spaced between kvals[0] to kvals[1].\n\ + - When doing the basic segmentations prior to any box merging, we discard all\n\ + rectangles that have an area < min_size. Therefore, all outputs and\n\ + subsequent merged rectangles are built out of rectangles that contain at\n\ + least min_size pixels. Note that setting min_size to a smaller value than\n\ + you might otherwise be interested in using can be useful since it allows a\n\ + larger number of possible merged boxes to be created.\n\ + - There are max_merging_iterations rounds of neighboring blob merging.\n\ + Therefore, this parameter has some effect on the number of output rectangles\n\ + you get, with larger values of the parameter giving more output rectangles.\n\ + - This function appends the output rectangles into #rects. This means that any\n\ + rectangles in rects before this function was called will still be in there\n\ + after it terminates. Note further that #rects will not contain any duplicate\n\ + rectangles. That is, for all valid i and j where i != j it will be true\n\ + that:\n\ + - #rects[i] != rects[j]"); + + m.def("get_frontal_face_detector", get_frontal_face_detector, + "Returns the default face detector"); + + m.def("train_simple_object_detector", train_simple_object_detector, + py::arg("dataset_filename"), py::arg("detector_output_filename"), py::arg("options"), +"requires \n\ + - options.C > 0 \n\ +ensures \n\ + - Uses the structural_object_detection_trainer to train a \n\ + simple_object_detector based on the labeled images in the XML file \n\ + dataset_filename. This function assumes the file dataset_filename is in the \n\ + XML format produced by dlib's save_image_dataset_metadata() routine. \n\ + - This function will apply a reasonable set of default parameters and \n\ + preprocessing techniques to the training procedure for simple_object_detector \n\ + objects. So the point of this function is to provide you with a very easy \n\ + way to train a basic object detector. \n\ + - The trained object detector is serialized to the file detector_output_filename."); + + m.def("train_simple_object_detector", train_simple_object_detector_on_images_py, + py::arg("images"), py::arg("boxes"), py::arg("options"), +"requires \n\ + - options.C > 0 \n\ + - len(images) == len(boxes) \n\ + - images should be a list of numpy matrices that represent images, either RGB or grayscale. \n\ + - boxes should be a list of lists of dlib.rectangle object. \n\ +ensures \n\ + - Uses the structural_object_detection_trainer to train a \n\ + simple_object_detector based on the labeled images and bounding boxes. \n\ + - This function will apply a reasonable set of default parameters and \n\ + preprocessing techniques to the training procedure for simple_object_detector \n\ + objects. So the point of this function is to provide you with a very easy \n\ + way to train a basic object detector. \n\ + - The trained object detector is returned."); + + m.def("test_simple_object_detector", test_simple_object_detector, + // Please see test_simple_object_detector for the reason upsampling_amount is -1 + py::arg("dataset_filename"), py::arg("detector_filename"), py::arg("upsampling_amount")=-1, + "requires \n\ + - Optionally, take the number of times to upsample the testing images (upsampling_amount >= 0). \n\ + ensures \n\ + - Loads an image dataset from dataset_filename. We assume dataset_filename is \n\ + a file using the XML format written by save_image_dataset_metadata(). \n\ + - Loads a simple_object_detector from the file detector_filename. This means \n\ + detector_filename should be a file produced by the train_simple_object_detector() \n\ + routine. \n\ + - This function tests the detector against the dataset and returns the \n\ + precision, recall, and average precision of the detector. In fact, The \n\ + return value of this function is identical to that of dlib's \n\ + test_object_detection_function() routine. Therefore, see the documentation \n\ + for test_object_detection_function() for a detailed definition of these \n\ + metrics. " + ); + + m.def("test_simple_object_detector", test_simple_object_detector_with_images_py, + py::arg("images"), py::arg("boxes"), py::arg("detector"), py::arg("upsampling_amount")=0, + "requires \n\ + - len(images) == len(boxes) \n\ + - images should be a list of numpy matrices that represent images, either RGB or grayscale. \n\ + - boxes should be a list of lists of dlib.rectangle object. \n\ + - Optionally, take the number of times to upsample the testing images (upsampling_amount >= 0). \n\ + ensures \n\ + - Loads a simple_object_detector from the file detector_filename. This means \n\ + detector_filename should be a file produced by the train_simple_object_detector() \n\ + routine. \n\ + - This function tests the detector against the dataset and returns the \n\ + precision, recall, and average precision of the detector. In fact, The \n\ + return value of this function is identical to that of dlib's \n\ + test_object_detection_function() routine. Therefore, see the documentation \n\ + for test_object_detection_function() for a detailed definition of these \n\ + metrics. " + ); + + m.def("test_simple_object_detector", test_simple_object_detector_py_with_images_py, + // Please see test_simple_object_detector_py_with_images_py for the reason upsampling_amount is -1 + py::arg("images"), py::arg("boxes"), py::arg("detector"), py::arg("upsampling_amount")=-1, + "requires \n\ + - len(images) == len(boxes) \n\ + - images should be a list of numpy matrices that represent images, either RGB or grayscale. \n\ + - boxes should be a list of lists of dlib.rectangle object. \n\ + ensures \n\ + - Loads a simple_object_detector from the file detector_filename. This means \n\ + detector_filename should be a file produced by the train_simple_object_detector() \n\ + routine. \n\ + - This function tests the detector against the dataset and returns the \n\ + precision, recall, and average precision of the detector. In fact, The \n\ + return value of this function is identical to that of dlib's \n\ + test_object_detection_function() routine. Therefore, see the documentation \n\ + for test_object_detection_function() for a detailed definition of these \n\ + metrics. " + ); + { + typedef simple_object_detector type; + py::class_<type, std::shared_ptr<type>>(m, "fhog_object_detector", + "This object represents a sliding window histogram-of-oriented-gradients based object detector.") + .def(py::init(&load_object_from_file<type>), +"Loads an object detector from a file that contains the output of the \n\ +train_simple_object_detector() routine or a serialized C++ object of type\n\ +object_detector<scan_fhog_pyramid<pyramid_down<6>>>.") + .def("__call__", run_detector_with_upscale2, py::arg("image"), py::arg("upsample_num_times")=0, +"requires \n\ + - image is a numpy ndarray containing either an 8bit grayscale or RGB \n\ + image. \n\ + - upsample_num_times >= 0 \n\ +ensures \n\ + - This function runs the object detector on the input image and returns \n\ + a list of detections. \n\ + - Upsamples the image upsample_num_times before running the basic \n\ + detector.") + .def("run", run_rect_detector, py::arg("image"), py::arg("upsample_num_times")=0, py::arg("adjust_threshold")=0.0, +"requires \n\ + - image is a numpy ndarray containing either an 8bit grayscale or RGB \n\ + image. \n\ + - upsample_num_times >= 0 \n\ +ensures \n\ + - This function runs the object detector on the input image and returns \n\ + a tuple of (list of detections, list of scores, list of weight_indices). \n\ + - Upsamples the image upsample_num_times before running the basic \n\ + detector.") + .def_static("run_multiple", run_multiple_rect_detectors, py::arg("detectors"), py::arg("image"), py::arg("upsample_num_times")=0, py::arg("adjust_threshold")=0.0, +"requires \n\ + - detectors is a list of detectors. \n\ + - image is a numpy ndarray containing either an 8bit grayscale or RGB \n\ + image. \n\ + - upsample_num_times >= 0 \n\ +ensures \n\ + - This function runs the list of object detectors at once on the input image and returns \n\ + a tuple of (list of detections, list of scores, list of weight_indices). \n\ + - Upsamples the image upsample_num_times before running the basic \n\ + detector.") + .def("save", save_simple_object_detector, py::arg("detector_output_filename"), "Save a simple_object_detector to the provided path.") + .def(py::pickle(&getstate<type>, &setstate<type>)); + } + { + typedef simple_object_detector_py type; + py::class_<type, std::shared_ptr<type>>(m, "simple_object_detector", + "This object represents a sliding window histogram-of-oriented-gradients based object detector.") + .def(py::init(&load_object_from_file<type>), +"Loads a simple_object_detector from a file that contains the output of the \n\ +train_simple_object_detector() routine.") + .def("__call__", &type::run_detector1, py::arg("image"), py::arg("upsample_num_times"), +"requires \n\ + - image is a numpy ndarray containing either an 8bit grayscale or RGB \n\ + image. \n\ + - upsample_num_times >= 0 \n\ +ensures \n\ + - This function runs the object detector on the input image and returns \n\ + a list of detections. \n\ + - Upsamples the image upsample_num_times before running the basic \n\ + detector. If you don't know how many times you want to upsample then \n\ + don't provide a value for upsample_num_times and an appropriate \n\ + default will be used.") + .def("__call__", &type::run_detector2, py::arg("image"), +"requires \n\ + - image is a numpy ndarray containing either an 8bit grayscale or RGB \n\ + image. \n\ +ensures \n\ + - This function runs the object detector on the input image and returns \n\ + a list of detections.") + .def("save", save_simple_object_detector_py, py::arg("detector_output_filename"), "Save a simple_object_detector to the provided path.") + .def(py::pickle(&getstate<type>, &setstate<type>)); + } +} + +// ---------------------------------------------------------------------------------------- diff --git a/ml/dlib/tools/python/src/opaque_types.h b/ml/dlib/tools/python/src/opaque_types.h new file mode 100644 index 00000000..1a31c08d --- /dev/null +++ b/ml/dlib/tools/python/src/opaque_types.h @@ -0,0 +1,55 @@ +// Copyright (C) 2017 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_PyTHON_OPAQUE_TYPES_H_ +#define DLIB_PyTHON_OPAQUE_TYPES_H_ + +#include <dlib/python.h> +#include <dlib/geometry.h> +#include <pybind11/stl_bind.h> +#include <vector> +#include <dlib/matrix.h> +#include <dlib/image_processing/full_object_detection.h> +#include <map> +#include <dlib/svm/ranking_tools.h> + +// All uses of PYBIND11_MAKE_OPAQUE need to be in this common header to avoid ODR +// violations. +PYBIND11_MAKE_OPAQUE(std::vector<dlib::rectangle>); +PYBIND11_MAKE_OPAQUE(std::vector<std::vector<dlib::rectangle>>); + +PYBIND11_MAKE_OPAQUE(std::vector<double>); + + +typedef std::vector<dlib::matrix<double,0,1>> column_vectors; +PYBIND11_MAKE_OPAQUE(column_vectors); +PYBIND11_MAKE_OPAQUE(std::vector<column_vectors>); + +typedef std::pair<unsigned long,unsigned long> ulong_pair; +PYBIND11_MAKE_OPAQUE(ulong_pair); +PYBIND11_MAKE_OPAQUE(std::vector<ulong_pair>); +PYBIND11_MAKE_OPAQUE(std::vector<std::vector<ulong_pair>>); + +typedef std::pair<unsigned long,double> ulong_double_pair; +PYBIND11_MAKE_OPAQUE(ulong_double_pair); +PYBIND11_MAKE_OPAQUE(std::vector<ulong_double_pair>); +PYBIND11_MAKE_OPAQUE(std::vector<std::vector<ulong_double_pair>>); +PYBIND11_MAKE_OPAQUE(std::vector<std::vector<std::vector<ulong_double_pair> > >); + +PYBIND11_MAKE_OPAQUE(std::vector<dlib::mmod_rect>); +PYBIND11_MAKE_OPAQUE(std::vector<std::vector<dlib::mmod_rect> >); +PYBIND11_MAKE_OPAQUE(std::vector<dlib::full_object_detection>); + +typedef std::map<std::string,dlib::point> parts_list_type; +PYBIND11_MAKE_OPAQUE(parts_list_type); + +typedef std::vector<dlib::ranking_pair<dlib::matrix<double,0,1>>> ranking_pairs; +typedef std::vector<std::pair<unsigned long,double> > sparse_vect; +typedef std::vector<dlib::ranking_pair<sparse_vect> > sparse_ranking_pairs; +PYBIND11_MAKE_OPAQUE(ranking_pairs); +PYBIND11_MAKE_OPAQUE(sparse_ranking_pairs); + + +PYBIND11_MAKE_OPAQUE(std::vector<dlib::point>); + +#endif // DLIB_PyTHON_OPAQUE_TYPES_H_ + diff --git a/ml/dlib/tools/python/src/other.cpp b/ml/dlib/tools/python/src/other.cpp new file mode 100644 index 00000000..3e014902 --- /dev/null +++ b/ml/dlib/tools/python/src/other.cpp @@ -0,0 +1,268 @@ +// Copyright (C) 2013 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. + +#include "opaque_types.h" +#include <dlib/python.h> +#include <dlib/matrix.h> +#include <dlib/data_io.h> +#include <dlib/sparse_vector.h> +#include <dlib/optimization.h> +#include <dlib/statistics/running_gradient.h> + +using namespace dlib; +using namespace std; +namespace py = pybind11; + +typedef std::vector<std::pair<unsigned long,double> > sparse_vect; + + +void _make_sparse_vector ( + sparse_vect& v +) +{ + make_sparse_vector_inplace(v); +} + +void _make_sparse_vector2 ( + std::vector<sparse_vect>& v +) +{ + for (unsigned long i = 0; i < v.size(); ++i) + make_sparse_vector_inplace(v[i]); +} + +py::tuple _load_libsvm_formatted_data( + const std::string& file_name +) +{ + std::vector<sparse_vect> samples; + std::vector<double> labels; + load_libsvm_formatted_data(file_name, samples, labels); + return py::make_tuple(samples, labels); +} + +void _save_libsvm_formatted_data ( + const std::string& file_name, + const std::vector<sparse_vect>& samples, + const std::vector<double>& labels +) +{ + pyassert(samples.size() == labels.size(), "Invalid inputs"); + save_libsvm_formatted_data(file_name, samples, labels); +} + +// ---------------------------------------------------------------------------------------- + +py::list _max_cost_assignment ( + const matrix<double>& cost +) +{ + if (cost.nr() != cost.nc()) + throw dlib::error("The input matrix must be square."); + + // max_cost_assignment() only works with integer matrices, so convert from + // double to integer. + const double scale = (std::numeric_limits<dlib::int64>::max()/1000)/max(abs(cost)); + matrix<dlib::int64> int_cost = matrix_cast<dlib::int64>(round(cost*scale)); + return vector_to_python_list(max_cost_assignment(int_cost)); +} + +double _assignment_cost ( + const matrix<double>& cost, + const py::list& assignment +) +{ + return assignment_cost(cost, python_list_to_vector<long>(assignment)); +} + +// ---------------------------------------------------------------------------------------- + +size_t py_count_steps_without_decrease ( + py::object arr, + double probability_of_decrease +) +{ + DLIB_CASSERT(0.5 < probability_of_decrease && probability_of_decrease < 1); + return count_steps_without_decrease(python_list_to_vector<double>(arr), probability_of_decrease); +} + +// ---------------------------------------------------------------------------------------- + +size_t py_count_steps_without_decrease_robust ( + py::object arr, + double probability_of_decrease, + double quantile_discard +) +{ + DLIB_CASSERT(0.5 < probability_of_decrease && probability_of_decrease < 1); + DLIB_CASSERT(0 <= quantile_discard && quantile_discard <= 1); + return count_steps_without_decrease_robust(python_list_to_vector<double>(arr), probability_of_decrease, quantile_discard); +} + +// ---------------------------------------------------------------------------------------- + +double probability_that_sequence_is_increasing ( + py::object arr +) +{ + DLIB_CASSERT(len(arr) > 2); + return probability_gradient_greater_than(python_list_to_vector<double>(arr), 0); +} + +// ---------------------------------------------------------------------------------------- + +void hit_enter_to_continue() +{ + std::cout << "Hit enter to continue"; + std::cin.get(); +} + +// ---------------------------------------------------------------------------------------- + +void bind_other(py::module &m) +{ + m.def("max_cost_assignment", _max_cost_assignment, py::arg("cost"), +"requires \n\ + - cost.nr() == cost.nc() \n\ + (i.e. the input must be a square matrix) \n\ +ensures \n\ + - Finds and returns the solution to the following optimization problem: \n\ + \n\ + Maximize: f(A) == assignment_cost(cost, A) \n\ + Subject to the following constraints: \n\ + - The elements of A are unique. That is, there aren't any \n\ + elements of A which are equal. \n\ + - len(A) == cost.nr() \n\ + \n\ + - Note that this function converts the input cost matrix into a 64bit fixed \n\ + point representation. Therefore, you should make sure that the values in \n\ + your cost matrix can be accurately represented by 64bit fixed point values. \n\ + If this is not the case then the solution my become inaccurate due to \n\ + rounding error. In general, this function will work properly when the ratio \n\ + of the largest to the smallest value in cost is no more than about 1e16. " + ); + + m.def("assignment_cost", _assignment_cost, py::arg("cost"),py::arg("assignment"), +"requires \n\ + - cost.nr() == cost.nc() \n\ + (i.e. the input must be a square matrix) \n\ + - for all valid i: \n\ + - 0 <= assignment[i] < cost.nr() \n\ +ensures \n\ + - Interprets cost as a cost assignment matrix. That is, cost[i][j] \n\ + represents the cost of assigning i to j. \n\ + - Interprets assignment as a particular set of assignments. That is, \n\ + i is assigned to assignment[i]. \n\ + - returns the cost of the given assignment. That is, returns \n\ + a number which is: \n\ + sum over i: cost[i][assignment[i]] " + ); + + m.def("make_sparse_vector", _make_sparse_vector , +"This function modifies its argument so that it is a properly sorted sparse vector. \n\ +This means that the elements of the sparse vector will be ordered so that pairs \n\ +with smaller indices come first. Additionally, there won't be any pairs with \n\ +identical indices. If such pairs were present in the input sparse vector then \n\ +their values will be added together and only one pair with their index will be \n\ +present in the output. " + ); + m.def("make_sparse_vector", _make_sparse_vector2 , + "This function modifies a sparse_vectors object so that all elements it contains are properly sorted sparse vectors."); + + m.def("load_libsvm_formatted_data",_load_libsvm_formatted_data, py::arg("file_name"), +"ensures \n\ + - Attempts to read a file of the given name that should contain libsvm \n\ + formatted data. The data is returned as a tuple where the first tuple \n\ + element is an array of sparse vectors and the second element is an array of \n\ + labels. " + ); + + m.def("save_libsvm_formatted_data",_save_libsvm_formatted_data, py::arg("file_name"), py::arg("samples"), py::arg("labels"), +"requires \n\ + - len(samples) == len(labels) \n\ +ensures \n\ + - saves the data to the given file in libsvm format " + ); + + m.def("hit_enter_to_continue", hit_enter_to_continue, + "Asks the user to hit enter to continue and pauses until they do so."); + + + + + m.def("count_steps_without_decrease",py_count_steps_without_decrease, py::arg("time_series"), py::arg("probability_of_decrease")=0.51, +"requires \n\ + - time_series must be a one dimensional array of real numbers. \n\ + - 0.5 < probability_of_decrease < 1 \n\ +ensures \n\ + - If you think of the contents of time_series as a potentially noisy time \n\ + series, then this function returns a count of how long the time series has \n\ + gone without noticeably decreasing in value. It does this by scanning along \n\ + the elements, starting from the end (i.e. time_series[-1]) to the beginning, \n\ + and checking how many elements you need to examine before you are confident \n\ + that the series has been decreasing in value. Here, \"confident of decrease\" \n\ + means the probability of decrease is >= probability_of_decrease. \n\ + - Setting probability_of_decrease to 0.51 means we count until we see even a \n\ + small hint of decrease, whereas a larger value of 0.99 would return a larger \n\ + count since it keeps going until it is nearly certain the time series is \n\ + decreasing. \n\ + - The max possible output from this function is len(time_series). \n\ + - The implementation of this function is done using the dlib::running_gradient \n\ + object, which is a tool that finds the least squares fit of a line to the \n\ + time series and the confidence interval around the slope of that line. That \n\ + can then be used in a simple statistical test to determine if the slope is \n\ + positive or negative." + /*! + requires + - time_series must be a one dimensional array of real numbers. + - 0.5 < probability_of_decrease < 1 + ensures + - If you think of the contents of time_series as a potentially noisy time + series, then this function returns a count of how long the time series has + gone without noticeably decreasing in value. It does this by scanning along + the elements, starting from the end (i.e. time_series[-1]) to the beginning, + and checking how many elements you need to examine before you are confident + that the series has been decreasing in value. Here, "confident of decrease" + means the probability of decrease is >= probability_of_decrease. + - Setting probability_of_decrease to 0.51 means we count until we see even a + small hint of decrease, whereas a larger value of 0.99 would return a larger + count since it keeps going until it is nearly certain the time series is + decreasing. + - The max possible output from this function is len(time_series). + - The implementation of this function is done using the dlib::running_gradient + object, which is a tool that finds the least squares fit of a line to the + time series and the confidence interval around the slope of that line. That + can then be used in a simple statistical test to determine if the slope is + positive or negative. + !*/ + ); + + m.def("count_steps_without_decrease_robust",py_count_steps_without_decrease_robust, py::arg("time_series"), py::arg("probability_of_decrease")=0.51, py::arg("quantile_discard")=0.1, +"requires \n\ + - time_series must be a one dimensional array of real numbers. \n\ + - 0.5 < probability_of_decrease < 1 \n\ + - 0 <= quantile_discard <= 1 \n\ +ensures \n\ + - This function behaves just like \n\ + count_steps_without_decrease(time_series,probability_of_decrease) except that \n\ + it ignores values in the time series that are in the upper quantile_discard \n\ + quantile. So for example, if the quantile discard is 0.1 then the 10% \n\ + largest values in the time series are ignored." + /*! + requires + - time_series must be a one dimensional array of real numbers. + - 0.5 < probability_of_decrease < 1 + - 0 <= quantile_discard <= 1 + ensures + - This function behaves just like + count_steps_without_decrease(time_series,probability_of_decrease) except that + it ignores values in the time series that are in the upper quantile_discard + quantile. So for example, if the quantile discard is 0.1 then the 10% + largest values in the time series are ignored. + !*/ + ); + + m.def("probability_that_sequence_is_increasing",probability_that_sequence_is_increasing, py::arg("time_series"), + "returns the probability that the given sequence of real numbers is increasing in value over time."); +} + diff --git a/ml/dlib/tools/python/src/rectangles.cpp b/ml/dlib/tools/python/src/rectangles.cpp new file mode 100644 index 00000000..d06ec591 --- /dev/null +++ b/ml/dlib/tools/python/src/rectangles.cpp @@ -0,0 +1,268 @@ +// Copyright (C) 2015 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. + +#include <dlib/python.h> +#include <dlib/geometry.h> +#include <pybind11/stl_bind.h> +#include "indexing.h" +#include "opaque_types.h" +#include <dlib/filtering.h> + +using namespace dlib; +using namespace std; + +namespace py = pybind11; + + +// ---------------------------------------------------------------------------------------- + +long left(const rectangle& r) { return r.left(); } +long top(const rectangle& r) { return r.top(); } +long right(const rectangle& r) { return r.right(); } +long bottom(const rectangle& r) { return r.bottom(); } +long width(const rectangle& r) { return r.width(); } +long height(const rectangle& r) { return r.height(); } +unsigned long area(const rectangle& r) { return r.area(); } + +double dleft(const drectangle& r) { return r.left(); } +double dtop(const drectangle& r) { return r.top(); } +double dright(const drectangle& r) { return r.right(); } +double dbottom(const drectangle& r) { return r.bottom(); } +double dwidth(const drectangle& r) { return r.width(); } +double dheight(const drectangle& r) { return r.height(); } +double darea(const drectangle& r) { return r.area(); } + +template <typename rect_type> +bool is_empty(const rect_type& r) { return r.is_empty(); } + +template <typename rect_type> +point center(const rect_type& r) { return center(r); } + +template <typename rect_type> +point dcenter(const rect_type& r) { return dcenter(r); } + +template <typename rect_type> +bool contains(const rect_type& r, const point& p) { return r.contains(p); } + +template <typename rect_type> +bool contains_xy(const rect_type& r, const long x, const long y) { return r.contains(point(x, y)); } + +template <typename rect_type> +bool contains_rec(const rect_type& r, const rect_type& r2) { return r.contains(r2); } + +template <typename rect_type> +rect_type intersect(const rect_type& r, const rect_type& r2) { return r.intersect(r2); } + +template <typename rect_type> +string print_rectangle_str(const rect_type& r) +{ + std::ostringstream sout; + sout << r; + return sout.str(); +} + +string print_rectangle_repr(const rectangle& r) +{ + std::ostringstream sout; + sout << "rectangle(" << r.left() << "," << r.top() << "," << r.right() << "," << r.bottom() << ")"; + return sout.str(); +} + +string print_drectangle_repr(const drectangle& r) +{ + std::ostringstream sout; + sout << "drectangle(" << r.left() << "," << r.top() << "," << r.right() << "," << r.bottom() << ")"; + return sout.str(); +} + +string print_rect_filter(const rect_filter& r) +{ + std::ostringstream sout; + sout << "rect_filter("; + sout << "measurement_noise="<<r.get_left().get_measurement_noise(); + sout << ", typical_acceleration="<<r.get_left().get_typical_acceleration(); + sout << ", max_measurement_deviation="<<r.get_left().get_max_measurement_deviation(); + sout << ")"; + return sout.str(); +} + + +rectangle add_point_to_rect(const rectangle& r, const point& p) +{ + return r + p; +} + +rectangle add_rect_to_rect(const rectangle& r, const rectangle& p) +{ + return r + p; +} + +rectangle& iadd_point_to_rect(rectangle& r, const point& p) +{ + r += p; + return r; +} + +rectangle& iadd_rect_to_rect(rectangle& r, const rectangle& p) +{ + r += p; + return r; +} + + + +// ---------------------------------------------------------------------------------------- + +void bind_rectangles(py::module& m) +{ + { + typedef rectangle type; + py::class_<type>(m, "rectangle", "This object represents a rectangular area of an image.") + .def(py::init<long,long,long,long>(), py::arg("left"),py::arg("top"),py::arg("right"),py::arg("bottom")) + .def(py::init()) + .def("area", &::area) + .def("left", &::left) + .def("top", &::top) + .def("right", &::right) + .def("bottom", &::bottom) + .def("width", &::width) + .def("height", &::height) + .def("is_empty", &::is_empty<type>) + .def("center", &::center<type>) + .def("dcenter", &::dcenter<type>) + .def("contains", &::contains<type>, py::arg("point")) + .def("contains", &::contains_xy<type>, py::arg("x"), py::arg("y")) + .def("contains", &::contains_rec<type>, py::arg("rectangle")) + .def("intersect", &::intersect<type>, py::arg("rectangle")) + .def("__str__", &::print_rectangle_str<type>) + .def("__repr__", &::print_rectangle_repr) + .def("__add__", &::add_point_to_rect) + .def("__add__", &::add_rect_to_rect) + .def("__iadd__", &::iadd_point_to_rect) + .def("__iadd__", &::iadd_rect_to_rect) + .def(py::self == py::self) + .def(py::self != py::self) + .def(py::pickle(&getstate<type>, &setstate<type>)); + } + { + typedef drectangle type; + py::class_<type>(m, "drectangle", "This object represents a rectangular area of an image with floating point coordinates.") + .def(py::init<double,double,double,double>(), py::arg("left"), py::arg("top"), py::arg("right"), py::arg("bottom")) + .def("area", &::darea) + .def("left", &::dleft) + .def("top", &::dtop) + .def("right", &::dright) + .def("bottom", &::dbottom) + .def("width", &::dwidth) + .def("height", &::dheight) + .def("is_empty", &::is_empty<type>) + .def("center", &::center<type>) + .def("dcenter", &::dcenter<type>) + .def("contains", &::contains<type>, py::arg("point")) + .def("contains", &::contains_xy<type>, py::arg("x"), py::arg("y")) + .def("contains", &::contains_rec<type>, py::arg("rectangle")) + .def("intersect", &::intersect<type>, py::arg("rectangle")) + .def("__str__", &::print_rectangle_str<type>) + .def("__repr__", &::print_drectangle_repr) + .def(py::self == py::self) + .def(py::self != py::self) + .def(py::pickle(&getstate<type>, &setstate<type>)); + } + + { + typedef rect_filter type; + py::class_<type>(m, "rect_filter", + R"asdf( + This object is a simple tool for filtering a rectangle that + measures the location of a moving object that has some non-trivial + momentum. Importantly, the measurements are noisy and the object can + experience sudden unpredictable accelerations. To accomplish this + filtering we use a simple Kalman filter with a state transition model of: + + position_{i+1} = position_{i} + velocity_{i} + velocity_{i+1} = velocity_{i} + some_unpredictable_acceleration + + and a measurement model of: + + measured_position_{i} = position_{i} + measurement_noise + + Where some_unpredictable_acceleration and measurement_noise are 0 mean Gaussian + noise sources with standard deviations of typical_acceleration and + measurement_noise respectively. + + To allow for really sudden and large but infrequent accelerations, at each + step we check if the current measured position deviates from the predicted + filtered position by more than max_measurement_deviation*measurement_noise + and if so we adjust the filter's state to keep it within these bounds. + This allows the moving object to undergo large unmodeled accelerations, far + in excess of what would be suggested by typical_acceleration, without + then experiencing a long lag time where the Kalman filter has to "catches + up" to the new position. )asdf" + ) + .def(py::init<double,double,double>(), py::arg("measurement_noise"), py::arg("typical_acceleration"), py::arg("max_measurement_deviation")) + .def("measurement_noise", [](const rect_filter& a){return a.get_left().get_measurement_noise();}) + .def("typical_acceleration", [](const rect_filter& a){return a.get_left().get_typical_acceleration();}) + .def("max_measurement_deviation", [](const rect_filter& a){return a.get_left().get_max_measurement_deviation();}) + .def("__call__", [](rect_filter& f, const dlib::rectangle& r){return rectangle(f(r)); }, py::arg("rect")) + .def("__repr__", print_rect_filter) + .def(py::pickle(&getstate<type>, &setstate<type>)); + } + + m.def("find_optimal_rect_filter", + [](const std::vector<rectangle>& rects, const double smoothness ) { return find_optimal_rect_filter(rects, smoothness); }, + py::arg("rects"), + py::arg("smoothness")=1, +"requires \n\ + - rects.size() > 4 \n\ + - smoothness >= 0 \n\ +ensures \n\ + - This function finds the \"optimal\" settings of a rect_filter based on recorded \n\ + measurement data stored in rects. Here we assume that rects is a complete \n\ + track history of some object's measured positions. Essentially, what we do \n\ + is find the rect_filter that minimizes the following objective function: \n\ + sum of abs(predicted_location[i] - measured_location[i]) + smoothness*abs(filtered_location[i]-filtered_location[i-1]) \n\ + Where i is a time index. \n\ + The sum runs over all the data in rects. So what we do is find the \n\ + filter settings that produce smooth filtered trajectories but also produce \n\ + filtered outputs that are as close to the measured positions as possible. \n\ + The larger the value of smoothness the less jittery the filter outputs will \n\ + be, but they might become biased or laggy if smoothness is set really high. " + /*! + requires + - rects.size() > 4 + - smoothness >= 0 + ensures + - This function finds the "optimal" settings of a rect_filter based on recorded + measurement data stored in rects. Here we assume that rects is a complete + track history of some object's measured positions. Essentially, what we do + is find the rect_filter that minimizes the following objective function: + sum of abs(predicted_location[i] - measured_location[i]) + smoothness*abs(filtered_location[i]-filtered_location[i-1]) + Where i is a time index. + The sum runs over all the data in rects. So what we do is find the + filter settings that produce smooth filtered trajectories but also produce + filtered outputs that are as close to the measured positions as possible. + The larger the value of smoothness the less jittery the filter outputs will + be, but they might become biased or laggy if smoothness is set really high. + !*/ + ); + + { + typedef std::vector<rectangle> type; + py::bind_vector<type>(m, "rectangles", "An array of rectangle objects.") + .def("clear", &type::clear) + .def("resize", resize<type>) + .def("extend", extend_vector_with_python_list<rectangle>) + .def(py::pickle(&getstate<type>, &setstate<type>)); + } + + { + typedef std::vector<std::vector<rectangle>> type; + py::bind_vector<type>(m, "rectangless", "An array of arrays of rectangle objects.") + .def("clear", &type::clear) + .def("resize", resize<type>) + .def("extend", extend_vector_with_python_list<rectangle>) + .def(py::pickle(&getstate<type>, &setstate<type>)); + } +} + +// ---------------------------------------------------------------------------------------- diff --git a/ml/dlib/tools/python/src/sequence_segmenter.cpp b/ml/dlib/tools/python/src/sequence_segmenter.cpp new file mode 100644 index 00000000..9fde1e77 --- /dev/null +++ b/ml/dlib/tools/python/src/sequence_segmenter.cpp @@ -0,0 +1,827 @@ +// Copyright (C) 2013 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. + +#include "opaque_types.h" +#include <dlib/python.h> +#include <dlib/matrix.h> +#include <dlib/svm_threaded.h> + +using namespace dlib; +using namespace std; +namespace py = pybind11; + +typedef matrix<double,0,1> dense_vect; +typedef std::vector<std::pair<unsigned long,double> > sparse_vect; +typedef std::vector<std::pair<unsigned long, unsigned long> > ranges; + +// ---------------------------------------------------------------------------------------- + +template <typename samp_type, bool BIO, bool high_order, bool nonnegative> +class segmenter_feature_extractor +{ + +public: + typedef std::vector<samp_type> sequence_type; + const static bool use_BIO_model = BIO; + const static bool use_high_order_features = high_order; + const static bool allow_negative_weights = nonnegative; + + + unsigned long _num_features; + unsigned long _window_size; + + segmenter_feature_extractor( + ) : _num_features(1), _window_size(1) {} + + segmenter_feature_extractor( + unsigned long _num_features_, + unsigned long _window_size_ + ) : _num_features(_num_features_), _window_size(_window_size_) {} + + unsigned long num_features( + ) const { return _num_features; } + + unsigned long window_size( + ) const {return _window_size; } + + template <typename feature_setter> + void get_features ( + feature_setter& set_feature, + const std::vector<dense_vect>& x, + unsigned long position + ) const + { + for (long i = 0; i < x[position].size(); ++i) + { + set_feature(i, x[position](i)); + } + } + + template <typename feature_setter> + void get_features ( + feature_setter& set_feature, + const std::vector<sparse_vect>& x, + unsigned long position + ) const + { + for (unsigned long i = 0; i < x[position].size(); ++i) + { + set_feature(x[position][i].first, x[position][i].second); + } + } + + friend void serialize(const segmenter_feature_extractor& item, std::ostream& out) + { + dlib::serialize(item._num_features, out); + dlib::serialize(item._window_size, out); + } + friend void deserialize(segmenter_feature_extractor& item, std::istream& in) + { + dlib::deserialize(item._num_features, in); + dlib::deserialize(item._window_size, in); + } +}; + +// ---------------------------------------------------------------------------------------- + +struct segmenter_type +{ + /*! + WHAT THIS OBJECT REPRESENTS + This the object that python will use directly to represent a + sequence_segmenter. All it does is contain all the possible template + instantiations of a sequence_segmenter and invoke the right one depending on + the mode variable. + !*/ + + segmenter_type() : mode(-1) + { } + + ranges segment_sequence_dense ( + const std::vector<dense_vect>& x + ) const + { + switch (mode) + { + case 0: return segmenter0(x); + case 1: return segmenter1(x); + case 2: return segmenter2(x); + case 3: return segmenter3(x); + case 4: return segmenter4(x); + case 5: return segmenter5(x); + case 6: return segmenter6(x); + case 7: return segmenter7(x); + default: throw dlib::error("Invalid mode"); + } + } + + ranges segment_sequence_sparse ( + const std::vector<sparse_vect>& x + ) const + { + switch (mode) + { + case 8: return segmenter8(x); + case 9: return segmenter9(x); + case 10: return segmenter10(x); + case 11: return segmenter11(x); + case 12: return segmenter12(x); + case 13: return segmenter13(x); + case 14: return segmenter14(x); + case 15: return segmenter15(x); + default: throw dlib::error("Invalid mode"); + } + } + + const matrix<double,0,1> get_weights() + { + switch(mode) + { + case 0: return segmenter0.get_weights(); + case 1: return segmenter1.get_weights(); + case 2: return segmenter2.get_weights(); + case 3: return segmenter3.get_weights(); + case 4: return segmenter4.get_weights(); + case 5: return segmenter5.get_weights(); + case 6: return segmenter6.get_weights(); + case 7: return segmenter7.get_weights(); + + case 8: return segmenter8.get_weights(); + case 9: return segmenter9.get_weights(); + case 10: return segmenter10.get_weights(); + case 11: return segmenter11.get_weights(); + case 12: return segmenter12.get_weights(); + case 13: return segmenter13.get_weights(); + case 14: return segmenter14.get_weights(); + case 15: return segmenter15.get_weights(); + + default: throw dlib::error("Invalid mode"); + } + } + + friend void serialize (const segmenter_type& item, std::ostream& out) + { + serialize(item.mode, out); + switch(item.mode) + { + case 0: serialize(item.segmenter0, out); break; + case 1: serialize(item.segmenter1, out); break; + case 2: serialize(item.segmenter2, out); break; + case 3: serialize(item.segmenter3, out); break; + case 4: serialize(item.segmenter4, out); break; + case 5: serialize(item.segmenter5, out); break; + case 6: serialize(item.segmenter6, out); break; + case 7: serialize(item.segmenter7, out); break; + + case 8: serialize(item.segmenter8, out); break; + case 9: serialize(item.segmenter9, out); break; + case 10: serialize(item.segmenter10, out); break; + case 11: serialize(item.segmenter11, out); break; + case 12: serialize(item.segmenter12, out); break; + case 13: serialize(item.segmenter13, out); break; + case 14: serialize(item.segmenter14, out); break; + case 15: serialize(item.segmenter15, out); break; + default: throw dlib::error("Invalid mode"); + } + } + friend void deserialize (segmenter_type& item, std::istream& in) + { + deserialize(item.mode, in); + switch(item.mode) + { + case 0: deserialize(item.segmenter0, in); break; + case 1: deserialize(item.segmenter1, in); break; + case 2: deserialize(item.segmenter2, in); break; + case 3: deserialize(item.segmenter3, in); break; + case 4: deserialize(item.segmenter4, in); break; + case 5: deserialize(item.segmenter5, in); break; + case 6: deserialize(item.segmenter6, in); break; + case 7: deserialize(item.segmenter7, in); break; + + case 8: deserialize(item.segmenter8, in); break; + case 9: deserialize(item.segmenter9, in); break; + case 10: deserialize(item.segmenter10, in); break; + case 11: deserialize(item.segmenter11, in); break; + case 12: deserialize(item.segmenter12, in); break; + case 13: deserialize(item.segmenter13, in); break; + case 14: deserialize(item.segmenter14, in); break; + case 15: deserialize(item.segmenter15, in); break; + default: throw dlib::error("Invalid mode"); + } + } + + int mode; + + typedef segmenter_feature_extractor<dense_vect, false,false,false> fe0; + typedef segmenter_feature_extractor<dense_vect, false,false,true> fe1; + typedef segmenter_feature_extractor<dense_vect, false,true, false> fe2; + typedef segmenter_feature_extractor<dense_vect, false,true, true> fe3; + typedef segmenter_feature_extractor<dense_vect, true, false,false> fe4; + typedef segmenter_feature_extractor<dense_vect, true, false,true> fe5; + typedef segmenter_feature_extractor<dense_vect, true, true, false> fe6; + typedef segmenter_feature_extractor<dense_vect, true, true, true> fe7; + sequence_segmenter<fe0> segmenter0; + sequence_segmenter<fe1> segmenter1; + sequence_segmenter<fe2> segmenter2; + sequence_segmenter<fe3> segmenter3; + sequence_segmenter<fe4> segmenter4; + sequence_segmenter<fe5> segmenter5; + sequence_segmenter<fe6> segmenter6; + sequence_segmenter<fe7> segmenter7; + + typedef segmenter_feature_extractor<sparse_vect, false,false,false> fe8; + typedef segmenter_feature_extractor<sparse_vect, false,false,true> fe9; + typedef segmenter_feature_extractor<sparse_vect, false,true, false> fe10; + typedef segmenter_feature_extractor<sparse_vect, false,true, true> fe11; + typedef segmenter_feature_extractor<sparse_vect, true, false,false> fe12; + typedef segmenter_feature_extractor<sparse_vect, true, false,true> fe13; + typedef segmenter_feature_extractor<sparse_vect, true, true, false> fe14; + typedef segmenter_feature_extractor<sparse_vect, true, true, true> fe15; + sequence_segmenter<fe8> segmenter8; + sequence_segmenter<fe9> segmenter9; + sequence_segmenter<fe10> segmenter10; + sequence_segmenter<fe11> segmenter11; + sequence_segmenter<fe12> segmenter12; + sequence_segmenter<fe13> segmenter13; + sequence_segmenter<fe14> segmenter14; + sequence_segmenter<fe15> segmenter15; +}; + + +// ---------------------------------------------------------------------------------------- + +struct segmenter_params +{ + segmenter_params() + { + use_BIO_model = true; + use_high_order_features = true; + allow_negative_weights = true; + window_size = 5; + num_threads = 4; + epsilon = 0.1; + max_cache_size = 40; + be_verbose = false; + C = 100; + } + + bool use_BIO_model; + bool use_high_order_features; + bool allow_negative_weights; + unsigned long window_size; + unsigned long num_threads; + double epsilon; + unsigned long max_cache_size; + bool be_verbose; + double C; +}; + + +string segmenter_params__str__(const segmenter_params& p) +{ + ostringstream sout; + if (p.use_BIO_model) + sout << "BIO,"; + else + sout << "BILOU,"; + + if (p.use_high_order_features) + sout << "highFeats,"; + else + sout << "lowFeats,"; + + if (p.allow_negative_weights) + sout << "signed,"; + else + sout << "non-negative,"; + + sout << "win="<<p.window_size << ","; + sout << "threads="<<p.num_threads << ","; + sout << "eps="<<p.epsilon << ","; + sout << "cache="<<p.max_cache_size << ","; + if (p.be_verbose) + sout << "verbose,"; + else + sout << "non-verbose,"; + sout << "C="<<p.C; + return trim(sout.str()); +} + +string segmenter_params__repr__(const segmenter_params& p) +{ + ostringstream sout; + sout << "<"; + sout << segmenter_params__str__(p); + sout << ">"; + return sout.str(); +} + +void serialize ( const segmenter_params& item, std::ostream& out) +{ + serialize(item.use_BIO_model, out); + serialize(item.use_high_order_features, out); + serialize(item.allow_negative_weights, out); + serialize(item.window_size, out); + serialize(item.num_threads, out); + serialize(item.epsilon, out); + serialize(item.max_cache_size, out); + serialize(item.be_verbose, out); + serialize(item.C, out); +} + +void deserialize (segmenter_params& item, std::istream& in) +{ + deserialize(item.use_BIO_model, in); + deserialize(item.use_high_order_features, in); + deserialize(item.allow_negative_weights, in); + deserialize(item.window_size, in); + deserialize(item.num_threads, in); + deserialize(item.epsilon, in); + deserialize(item.max_cache_size, in); + deserialize(item.be_verbose, in); + deserialize(item.C, in); +} + +// ---------------------------------------------------------------------------------------- + +template <typename T> +void configure_trainer ( + const std::vector<std::vector<dense_vect> >& samples, + structural_sequence_segmentation_trainer<T>& trainer, + const segmenter_params& params +) +{ + pyassert(samples.size() != 0, "Invalid arguments. You must give some training sequences."); + pyassert(samples[0].size() != 0, "Invalid arguments. You can't have zero length training sequences."); + pyassert(params.window_size != 0, "Invalid window_size parameter, it must be > 0."); + pyassert(params.epsilon > 0, "Invalid epsilon parameter, it must be > 0."); + pyassert(params.C > 0, "Invalid C parameter, it must be > 0."); + const long dims = samples[0][0].size(); + + trainer = structural_sequence_segmentation_trainer<T>(T(dims, params.window_size)); + trainer.set_num_threads(params.num_threads); + trainer.set_epsilon(params.epsilon); + trainer.set_max_cache_size(params.max_cache_size); + trainer.set_c(params.C); + if (params.be_verbose) + trainer.be_verbose(); +} + +// ---------------------------------------------------------------------------------------- + +template <typename T> +void configure_trainer ( + const std::vector<std::vector<sparse_vect> >& samples, + structural_sequence_segmentation_trainer<T>& trainer, + const segmenter_params& params +) +{ + pyassert(samples.size() != 0, "Invalid arguments. You must give some training sequences."); + pyassert(samples[0].size() != 0, "Invalid arguments. You can't have zero length training sequences."); + + unsigned long dims = 0; + for (unsigned long i = 0; i < samples.size(); ++i) + { + dims = std::max(dims, max_index_plus_one(samples[i])); + } + + trainer = structural_sequence_segmentation_trainer<T>(T(dims, params.window_size)); + trainer.set_num_threads(params.num_threads); + trainer.set_epsilon(params.epsilon); + trainer.set_max_cache_size(params.max_cache_size); + trainer.set_c(params.C); + if (params.be_verbose) + trainer.be_verbose(); +} + +// ---------------------------------------------------------------------------------------- + +segmenter_type train_dense ( + const std::vector<std::vector<dense_vect> >& samples, + const std::vector<ranges>& segments, + segmenter_params params +) +{ + pyassert(is_sequence_segmentation_problem(samples, segments), "Invalid inputs"); + + int mode = 0; + if (params.use_BIO_model) + mode = mode*2 + 1; + else + mode = mode*2; + if (params.use_high_order_features) + mode = mode*2 + 1; + else + mode = mode*2; + if (params.allow_negative_weights) + mode = mode*2 + 1; + else + mode = mode*2; + + + segmenter_type res; + res.mode = mode; + switch(mode) + { + case 0: { structural_sequence_segmentation_trainer<segmenter_type::fe0> trainer; + configure_trainer(samples, trainer, params); + res.segmenter0 = trainer.train(samples, segments); + } break; + case 1: { structural_sequence_segmentation_trainer<segmenter_type::fe1> trainer; + configure_trainer(samples, trainer, params); + res.segmenter1 = trainer.train(samples, segments); + } break; + case 2: { structural_sequence_segmentation_trainer<segmenter_type::fe2> trainer; + configure_trainer(samples, trainer, params); + res.segmenter2 = trainer.train(samples, segments); + } break; + case 3: { structural_sequence_segmentation_trainer<segmenter_type::fe3> trainer; + configure_trainer(samples, trainer, params); + res.segmenter3 = trainer.train(samples, segments); + } break; + case 4: { structural_sequence_segmentation_trainer<segmenter_type::fe4> trainer; + configure_trainer(samples, trainer, params); + res.segmenter4 = trainer.train(samples, segments); + } break; + case 5: { structural_sequence_segmentation_trainer<segmenter_type::fe5> trainer; + configure_trainer(samples, trainer, params); + res.segmenter5 = trainer.train(samples, segments); + } break; + case 6: { structural_sequence_segmentation_trainer<segmenter_type::fe6> trainer; + configure_trainer(samples, trainer, params); + res.segmenter6 = trainer.train(samples, segments); + } break; + case 7: { structural_sequence_segmentation_trainer<segmenter_type::fe7> trainer; + configure_trainer(samples, trainer, params); + res.segmenter7 = trainer.train(samples, segments); + } break; + default: throw dlib::error("Invalid mode"); + } + + + return res; +} + +// ---------------------------------------------------------------------------------------- + +segmenter_type train_sparse ( + const std::vector<std::vector<sparse_vect> >& samples, + const std::vector<ranges>& segments, + segmenter_params params +) +{ + pyassert(is_sequence_segmentation_problem(samples, segments), "Invalid inputs"); + + int mode = 0; + if (params.use_BIO_model) + mode = mode*2 + 1; + else + mode = mode*2; + if (params.use_high_order_features) + mode = mode*2 + 1; + else + mode = mode*2; + if (params.allow_negative_weights) + mode = mode*2 + 1; + else + mode = mode*2; + + mode += 8; + + segmenter_type res; + res.mode = mode; + switch(mode) + { + case 8: { structural_sequence_segmentation_trainer<segmenter_type::fe8> trainer; + configure_trainer(samples, trainer, params); + res.segmenter8 = trainer.train(samples, segments); + } break; + case 9: { structural_sequence_segmentation_trainer<segmenter_type::fe9> trainer; + configure_trainer(samples, trainer, params); + res.segmenter9 = trainer.train(samples, segments); + } break; + case 10: { structural_sequence_segmentation_trainer<segmenter_type::fe10> trainer; + configure_trainer(samples, trainer, params); + res.segmenter10 = trainer.train(samples, segments); + } break; + case 11: { structural_sequence_segmentation_trainer<segmenter_type::fe11> trainer; + configure_trainer(samples, trainer, params); + res.segmenter11 = trainer.train(samples, segments); + } break; + case 12: { structural_sequence_segmentation_trainer<segmenter_type::fe12> trainer; + configure_trainer(samples, trainer, params); + res.segmenter12 = trainer.train(samples, segments); + } break; + case 13: { structural_sequence_segmentation_trainer<segmenter_type::fe13> trainer; + configure_trainer(samples, trainer, params); + res.segmenter13 = trainer.train(samples, segments); + } break; + case 14: { structural_sequence_segmentation_trainer<segmenter_type::fe14> trainer; + configure_trainer(samples, trainer, params); + res.segmenter14 = trainer.train(samples, segments); + } break; + case 15: { structural_sequence_segmentation_trainer<segmenter_type::fe15> trainer; + configure_trainer(samples, trainer, params); + res.segmenter15 = trainer.train(samples, segments); + } break; + default: throw dlib::error("Invalid mode"); + } + + + return res; +} + +// ---------------------------------------------------------------------------------------- + + +struct segmenter_test +{ + double precision; + double recall; + double f1; +}; + +void serialize(const segmenter_test& item, std::ostream& out) +{ + serialize(item.precision, out); + serialize(item.recall, out); + serialize(item.f1, out); +} + +void deserialize(segmenter_test& item, std::istream& in) +{ + deserialize(item.precision, in); + deserialize(item.recall, in); + deserialize(item.f1, in); +} + +std::string segmenter_test__str__(const segmenter_test& item) +{ + std::ostringstream sout; + sout << "precision: "<< item.precision << " recall: "<< item.recall << " f1-score: " << item.f1; + return sout.str(); +} +std::string segmenter_test__repr__(const segmenter_test& item) { return "< " + segmenter_test__str__(item) + " >";} + +// ---------------------------------------------------------------------------------------- + +const segmenter_test test_sequence_segmenter1 ( + const segmenter_type& segmenter, + const std::vector<std::vector<dense_vect> >& samples, + const std::vector<ranges>& segments +) +{ + pyassert(is_sequence_segmentation_problem(samples, segments), "Invalid inputs"); + matrix<double,1,3> res; + + switch(segmenter.mode) + { + case 0: res = test_sequence_segmenter(segmenter.segmenter0, samples, segments); break; + case 1: res = test_sequence_segmenter(segmenter.segmenter1, samples, segments); break; + case 2: res = test_sequence_segmenter(segmenter.segmenter2, samples, segments); break; + case 3: res = test_sequence_segmenter(segmenter.segmenter3, samples, segments); break; + case 4: res = test_sequence_segmenter(segmenter.segmenter4, samples, segments); break; + case 5: res = test_sequence_segmenter(segmenter.segmenter5, samples, segments); break; + case 6: res = test_sequence_segmenter(segmenter.segmenter6, samples, segments); break; + case 7: res = test_sequence_segmenter(segmenter.segmenter7, samples, segments); break; + default: throw dlib::error("Invalid mode"); + } + + + segmenter_test temp; + temp.precision = res(0); + temp.recall = res(1); + temp.f1 = res(2); + return temp; +} + +const segmenter_test test_sequence_segmenter2 ( + const segmenter_type& segmenter, + const std::vector<std::vector<sparse_vect> >& samples, + const std::vector<ranges>& segments +) +{ + pyassert(is_sequence_segmentation_problem(samples, segments), "Invalid inputs"); + matrix<double,1,3> res; + + switch(segmenter.mode) + { + case 8: res = test_sequence_segmenter(segmenter.segmenter8, samples, segments); break; + case 9: res = test_sequence_segmenter(segmenter.segmenter9, samples, segments); break; + case 10: res = test_sequence_segmenter(segmenter.segmenter10, samples, segments); break; + case 11: res = test_sequence_segmenter(segmenter.segmenter11, samples, segments); break; + case 12: res = test_sequence_segmenter(segmenter.segmenter12, samples, segments); break; + case 13: res = test_sequence_segmenter(segmenter.segmenter13, samples, segments); break; + case 14: res = test_sequence_segmenter(segmenter.segmenter14, samples, segments); break; + case 15: res = test_sequence_segmenter(segmenter.segmenter15, samples, segments); break; + default: throw dlib::error("Invalid mode"); + } + + + segmenter_test temp; + temp.precision = res(0); + temp.recall = res(1); + temp.f1 = res(2); + return temp; +} + +// ---------------------------------------------------------------------------------------- + +const segmenter_test cross_validate_sequence_segmenter1 ( + const std::vector<std::vector<dense_vect> >& samples, + const std::vector<ranges>& segments, + long folds, + segmenter_params params +) +{ + pyassert(is_sequence_segmentation_problem(samples, segments), "Invalid inputs"); + pyassert(1 < folds && folds <= static_cast<long>(samples.size()), "folds argument is outside the valid range."); + + matrix<double,1,3> res; + + int mode = 0; + if (params.use_BIO_model) + mode = mode*2 + 1; + else + mode = mode*2; + if (params.use_high_order_features) + mode = mode*2 + 1; + else + mode = mode*2; + if (params.allow_negative_weights) + mode = mode*2 + 1; + else + mode = mode*2; + + + switch(mode) + { + case 0: { structural_sequence_segmentation_trainer<segmenter_type::fe0> trainer; + configure_trainer(samples, trainer, params); + res = cross_validate_sequence_segmenter(trainer, samples, segments, folds); + } break; + case 1: { structural_sequence_segmentation_trainer<segmenter_type::fe1> trainer; + configure_trainer(samples, trainer, params); + res = cross_validate_sequence_segmenter(trainer, samples, segments, folds); + } break; + case 2: { structural_sequence_segmentation_trainer<segmenter_type::fe2> trainer; + configure_trainer(samples, trainer, params); + res = cross_validate_sequence_segmenter(trainer, samples, segments, folds); + } break; + case 3: { structural_sequence_segmentation_trainer<segmenter_type::fe3> trainer; + configure_trainer(samples, trainer, params); + res = cross_validate_sequence_segmenter(trainer, samples, segments, folds); + } break; + case 4: { structural_sequence_segmentation_trainer<segmenter_type::fe4> trainer; + configure_trainer(samples, trainer, params); + res = cross_validate_sequence_segmenter(trainer, samples, segments, folds); + } break; + case 5: { structural_sequence_segmentation_trainer<segmenter_type::fe5> trainer; + configure_trainer(samples, trainer, params); + res = cross_validate_sequence_segmenter(trainer, samples, segments, folds); + } break; + case 6: { structural_sequence_segmentation_trainer<segmenter_type::fe6> trainer; + configure_trainer(samples, trainer, params); + res = cross_validate_sequence_segmenter(trainer, samples, segments, folds); + } break; + case 7: { structural_sequence_segmentation_trainer<segmenter_type::fe7> trainer; + configure_trainer(samples, trainer, params); + res = cross_validate_sequence_segmenter(trainer, samples, segments, folds); + } break; + default: throw dlib::error("Invalid mode"); + } + + + segmenter_test temp; + temp.precision = res(0); + temp.recall = res(1); + temp.f1 = res(2); + return temp; +} + +const segmenter_test cross_validate_sequence_segmenter2 ( + const std::vector<std::vector<sparse_vect> >& samples, + const std::vector<ranges>& segments, + long folds, + segmenter_params params +) +{ + pyassert(is_sequence_segmentation_problem(samples, segments), "Invalid inputs"); + pyassert(1 < folds && folds <= static_cast<long>(samples.size()), "folds argument is outside the valid range."); + + matrix<double,1,3> res; + + int mode = 0; + if (params.use_BIO_model) + mode = mode*2 + 1; + else + mode = mode*2; + if (params.use_high_order_features) + mode = mode*2 + 1; + else + mode = mode*2; + if (params.allow_negative_weights) + mode = mode*2 + 1; + else + mode = mode*2; + + mode += 8; + + switch(mode) + { + case 8: { structural_sequence_segmentation_trainer<segmenter_type::fe8> trainer; + configure_trainer(samples, trainer, params); + res = cross_validate_sequence_segmenter(trainer, samples, segments, folds); + } break; + case 9: { structural_sequence_segmentation_trainer<segmenter_type::fe9> trainer; + configure_trainer(samples, trainer, params); + res = cross_validate_sequence_segmenter(trainer, samples, segments, folds); + } break; + case 10: { structural_sequence_segmentation_trainer<segmenter_type::fe10> trainer; + configure_trainer(samples, trainer, params); + res = cross_validate_sequence_segmenter(trainer, samples, segments, folds); + } break; + case 11: { structural_sequence_segmentation_trainer<segmenter_type::fe11> trainer; + configure_trainer(samples, trainer, params); + res = cross_validate_sequence_segmenter(trainer, samples, segments, folds); + } break; + case 12: { structural_sequence_segmentation_trainer<segmenter_type::fe12> trainer; + configure_trainer(samples, trainer, params); + res = cross_validate_sequence_segmenter(trainer, samples, segments, folds); + } break; + case 13: { structural_sequence_segmentation_trainer<segmenter_type::fe13> trainer; + configure_trainer(samples, trainer, params); + res = cross_validate_sequence_segmenter(trainer, samples, segments, folds); + } break; + case 14: { structural_sequence_segmentation_trainer<segmenter_type::fe14> trainer; + configure_trainer(samples, trainer, params); + res = cross_validate_sequence_segmenter(trainer, samples, segments, folds); + } break; + case 15: { structural_sequence_segmentation_trainer<segmenter_type::fe15> trainer; + configure_trainer(samples, trainer, params); + res = cross_validate_sequence_segmenter(trainer, samples, segments, folds); + } break; + default: throw dlib::error("Invalid mode"); + } + + + segmenter_test temp; + temp.precision = res(0); + temp.recall = res(1); + temp.f1 = res(2); + return temp; +} + +// ---------------------------------------------------------------------------------------- + +void bind_sequence_segmenter(py::module& m) +{ + py::class_<segmenter_params>(m, "segmenter_params", +"This class is used to define all the optional parameters to the \n\ +train_sequence_segmenter() and cross_validate_sequence_segmenter() routines. ") + .def(py::init<>()) + .def_readwrite("use_BIO_model", &segmenter_params::use_BIO_model) + .def_readwrite("use_high_order_features", &segmenter_params::use_high_order_features) + .def_readwrite("allow_negative_weights", &segmenter_params::allow_negative_weights) + .def_readwrite("window_size", &segmenter_params::window_size) + .def_readwrite("num_threads", &segmenter_params::num_threads) + .def_readwrite("epsilon", &segmenter_params::epsilon) + .def_readwrite("max_cache_size", &segmenter_params::max_cache_size) + .def_readwrite("C", &segmenter_params::C, "SVM C parameter") + .def_readwrite("be_verbose", &segmenter_params::be_verbose) + .def("__repr__",&segmenter_params__repr__) + .def("__str__",&segmenter_params__str__) + .def(py::pickle(&getstate<segmenter_params>, &setstate<segmenter_params>)); + + py::class_<segmenter_type> (m, "segmenter_type", "This object represents a sequence segmenter and is the type of object " + "returned by the dlib.train_sequence_segmenter() routine.") + .def("__call__", &segmenter_type::segment_sequence_dense) + .def("__call__", &segmenter_type::segment_sequence_sparse) + .def_property_readonly("weights", &segmenter_type::get_weights) + .def(py::pickle(&getstate<segmenter_type>, &setstate<segmenter_type>)); + + py::class_<segmenter_test> (m, "segmenter_test", "This object is the output of the dlib.test_sequence_segmenter() and " + "dlib.cross_validate_sequence_segmenter() routines.") + .def_readwrite("precision", &segmenter_test::precision) + .def_readwrite("recall", &segmenter_test::recall) + .def_readwrite("f1", &segmenter_test::f1) + .def("__repr__",&segmenter_test__repr__) + .def("__str__",&segmenter_test__str__) + .def(py::pickle(&getstate<segmenter_test>, &setstate<segmenter_test>)); + + m.def("train_sequence_segmenter", train_dense, py::arg("samples"), py::arg("segments"), py::arg("params")=segmenter_params()); + m.def("train_sequence_segmenter", train_sparse, py::arg("samples"), py::arg("segments"), py::arg("params")=segmenter_params()); + + + m.def("test_sequence_segmenter", test_sequence_segmenter1); + m.def("test_sequence_segmenter", test_sequence_segmenter2); + + m.def("cross_validate_sequence_segmenter", cross_validate_sequence_segmenter1, + py::arg("samples"), py::arg("segments"), py::arg("folds"), py::arg("params")=segmenter_params()); + m.def("cross_validate_sequence_segmenter", cross_validate_sequence_segmenter2, + py::arg("samples"), py::arg("segments"), py::arg("folds"), py::arg("params")=segmenter_params()); +} + + + + diff --git a/ml/dlib/tools/python/src/serialize_object_detector.h b/ml/dlib/tools/python/src/serialize_object_detector.h new file mode 100644 index 00000000..e53401c8 --- /dev/null +++ b/ml/dlib/tools/python/src/serialize_object_detector.h @@ -0,0 +1,49 @@ +// Copyright (C) 2014 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_SERIALIZE_OBJECT_DETECTOR_H__ +#define DLIB_SERIALIZE_OBJECT_DETECTOR_H__ + +#include "simple_object_detector_py.h" + +namespace dlib +{ + inline void serialize (const dlib::simple_object_detector_py& item, std::ostream& out) + { + int version = 1; + serialize(item.detector, out); + serialize(version, out); + serialize(item.upsampling_amount, out); + } + + inline void deserialize (dlib::simple_object_detector_py& item, std::istream& in) + { + int version = 0; + deserialize(item.detector, in); + deserialize(version, in); + if (version != 1) + throw dlib::serialization_error("Unexpected version found while deserializing a simple_object_detector."); + deserialize(item.upsampling_amount, in); + } + + inline void save_simple_object_detector_py(const simple_object_detector_py& detector, const std::string& detector_output_filename) + { + std::ofstream fout(detector_output_filename.c_str(), std::ios::binary); + int version = 1; + serialize(detector.detector, fout); + serialize(version, fout); + serialize(detector.upsampling_amount, fout); + } + +// ---------------------------------------------------------------------------------------- + + inline void save_simple_object_detector(const simple_object_detector& detector, const std::string& detector_output_filename) + { + std::ofstream fout(detector_output_filename.c_str(), std::ios::binary); + serialize(detector, fout); + // Don't need to save version of upsampling amount because want to write out the + // object detector just like the C++ code that serializes an object_detector would. + // We also don't know the upsampling amount in this case anyway. + } +} + +#endif // DLIB_SERIALIZE_OBJECT_DETECTOR_H__ diff --git a/ml/dlib/tools/python/src/shape_predictor.cpp b/ml/dlib/tools/python/src/shape_predictor.cpp new file mode 100644 index 00000000..76f21750 --- /dev/null +++ b/ml/dlib/tools/python/src/shape_predictor.cpp @@ -0,0 +1,319 @@ +// Copyright (C) 2014 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. + +#include "opaque_types.h" +#include <dlib/python.h> +#include <dlib/geometry.h> +#include <dlib/image_processing.h> +#include "shape_predictor.h" +#include "conversion.h" + +using namespace dlib; +using namespace std; + +namespace py = pybind11; + +// ---------------------------------------------------------------------------------------- + +full_object_detection run_predictor ( + shape_predictor& predictor, + py::object img, + py::object rect +) +{ + rectangle box = rect.cast<rectangle>(); + if (is_gray_python_image(img)) + { + return predictor(numpy_gray_image(img), box); + } + else if (is_rgb_python_image(img)) + { + return predictor(numpy_rgb_image(img), box); + } + else + { + throw dlib::error("Unsupported image type, must be 8bit gray or RGB image."); + } +} + +void save_shape_predictor(const shape_predictor& predictor, const std::string& predictor_output_filename) +{ + std::ofstream fout(predictor_output_filename.c_str(), std::ios::binary); + serialize(predictor, fout); +} + +// ---------------------------------------------------------------------------------------- + +rectangle full_obj_det_get_rect (const full_object_detection& detection) +{ return detection.get_rect(); } + +unsigned long full_obj_det_num_parts (const full_object_detection& detection) +{ return detection.num_parts(); } + +point full_obj_det_part (const full_object_detection& detection, const unsigned long idx) +{ + if (idx >= detection.num_parts()) + { + PyErr_SetString(PyExc_IndexError, "Index out of range"); + throw py::error_already_set(); + } + return detection.part(idx); +} + +std::vector<point> full_obj_det_parts (const full_object_detection& detection) +{ + const unsigned long num_parts = detection.num_parts(); + std::vector<point> parts(num_parts); + for (unsigned long j = 0; j < num_parts; ++j) + parts[j] = detection.part(j); + return parts; +} + +std::shared_ptr<full_object_detection> full_obj_det_init(py::object& pyrect, py::object& pyparts) +{ + const unsigned long num_parts = py::len(pyparts); + std::vector<point> parts(num_parts); + rectangle rect = pyrect.cast<rectangle>(); + py::iterator parts_it = pyparts.begin(); + + for (unsigned long j = 0; + parts_it != pyparts.end(); + ++j, ++parts_it) + parts[j] = parts_it->cast<point>(); + + return std::make_shared<full_object_detection>(rect, parts); +} + +// ---------------------------------------------------------------------------------------- + +inline shape_predictor train_shape_predictor_on_images_py ( + const py::list& pyimages, + const py::list& pydetections, + const shape_predictor_training_options& options +) +{ + const unsigned long num_images = py::len(pyimages); + if (num_images != py::len(pydetections)) + throw dlib::error("The length of the detections list must match the length of the images list."); + + std::vector<std::vector<full_object_detection> > detections(num_images); + dlib::array<array2d<unsigned char> > images(num_images); + images_and_nested_params_to_dlib(pyimages, pydetections, images, detections); + + return train_shape_predictor_on_images(images, detections, options); +} + + +inline double test_shape_predictor_with_images_py ( + const py::list& pyimages, + const py::list& pydetections, + const py::list& pyscales, + const shape_predictor& predictor +) +{ + const unsigned long num_images = py::len(pyimages); + const unsigned long num_scales = py::len(pyscales); + if (num_images != py::len(pydetections)) + throw dlib::error("The length of the detections list must match the length of the images list."); + + if (num_scales > 0 && num_scales != num_images) + throw dlib::error("The length of the scales list must match the length of the detections list."); + + std::vector<std::vector<full_object_detection> > detections(num_images); + std::vector<std::vector<double> > scales; + if (num_scales > 0) + scales.resize(num_scales); + dlib::array<array2d<unsigned char> > images(num_images); + + // Now copy the data into dlib based objects so we can call the testing routine. + for (unsigned long i = 0; i < num_images; ++i) + { + const unsigned long num_boxes = py::len(pydetections[i]); + for (py::iterator det_it = pydetections[i].begin(); + det_it != pydetections[i].end(); + ++det_it) + detections[i].push_back(det_it->cast<full_object_detection>()); + + pyimage_to_dlib_image(pyimages[i], images[i]); + if (num_scales > 0) + { + if (num_boxes != py::len(pyscales[i])) + throw dlib::error("The length of the scales list must match the length of the detections list."); + for (py::iterator scale_it = pyscales[i].begin(); + scale_it != pyscales[i].end(); + ++scale_it) + scales[i].push_back(scale_it->cast<double>()); + } + } + + return test_shape_predictor_with_images(images, detections, scales, predictor); +} + +inline double test_shape_predictor_with_images_no_scales_py ( + const py::list& pyimages, + const py::list& pydetections, + const shape_predictor& predictor +) +{ + py::list pyscales; + return test_shape_predictor_with_images_py(pyimages, pydetections, pyscales, predictor); +} + +// ---------------------------------------------------------------------------------------- + +void bind_shape_predictors(py::module &m) +{ + { + typedef full_object_detection type; + py::class_<type, std::shared_ptr<type>>(m, "full_object_detection", + "This object represents the location of an object in an image along with the \ + positions of each of its constituent parts.") + .def(py::init(&full_obj_det_init), +"requires \n\ + - rect: dlib rectangle \n\ + - parts: list of dlib points") + .def_property_readonly("rect", &full_obj_det_get_rect, "Bounding box from the underlying detector. Parts can be outside box if appropriate.") + .def_property_readonly("num_parts", &full_obj_det_num_parts, "The number of parts of the object.") + .def("part", &full_obj_det_part, py::arg("idx"), "A single part of the object as a dlib point.") + .def("parts", &full_obj_det_parts, "A vector of dlib points representing all of the parts.") + .def(py::pickle(&getstate<type>, &setstate<type>)); + } + { + typedef shape_predictor_training_options type; + py::class_<type>(m, "shape_predictor_training_options", + "This object is a container for the options to the train_shape_predictor() routine.") + .def(py::init()) + .def_readwrite("be_verbose", &type::be_verbose, + "If true, train_shape_predictor() will print out a lot of information to stdout while training.") + .def_readwrite("cascade_depth", &type::cascade_depth, + "The number of cascades created to train the model with.") + .def_readwrite("tree_depth", &type::tree_depth, + "The depth of the trees used in each cascade. There are pow(2, get_tree_depth()) leaves in each tree") + .def_readwrite("num_trees_per_cascade_level", &type::num_trees_per_cascade_level, + "The number of trees created for each cascade.") + .def_readwrite("nu", &type::nu, + "The regularization parameter. Larger values of this parameter \ + will cause the algorithm to fit the training data better but may also \ + cause overfitting. The value must be in the range (0, 1].") + .def_readwrite("oversampling_amount", &type::oversampling_amount, + "The number of randomly selected initial starting points sampled for each training example") + .def_readwrite("feature_pool_size", &type::feature_pool_size, + "Number of pixels used to generate features for the random trees.") + .def_readwrite("lambda_param", &type::lambda_param, + "Controls how tight the feature sampling should be. Lower values enforce closer features.") + .def_readwrite("num_test_splits", &type::num_test_splits, + "Number of split features at each node to sample. The one that gives the best split is chosen.") + .def_readwrite("feature_pool_region_padding", &type::feature_pool_region_padding, + "Size of region within which to sample features for the feature pool, \ + e.g a padding of 0.5 would cause the algorithm to sample pixels from a box that was 2x2 pixels") + .def_readwrite("random_seed", &type::random_seed, + "The random seed used by the internal random number generator") + .def_readwrite("num_threads", &type::num_threads, + "Use this many threads/CPU cores for training.") + .def("__str__", &::print_shape_predictor_training_options) + .def(py::pickle(&getstate<type>, &setstate<type>)); + } + { + typedef shape_predictor type; + py::class_<type, std::shared_ptr<type>>(m, "shape_predictor", +"This object is a tool that takes in an image region containing some object and \ +outputs a set of point locations that define the pose of the object. The classic \ +example of this is human face pose prediction, where you take an image of a human \ +face as input and are expected to identify the locations of important facial \ +landmarks such as the corners of the mouth and eyes, tip of the nose, and so forth.") + .def(py::init()) + .def(py::init(&load_object_from_file<type>), +"Loads a shape_predictor from a file that contains the output of the \n\ +train_shape_predictor() routine.") + .def("__call__", &run_predictor, py::arg("image"), py::arg("box"), +"requires \n\ + - image is a numpy ndarray containing either an 8bit grayscale or RGB \n\ + image. \n\ + - box is the bounding box to begin the shape prediction inside. \n\ +ensures \n\ + - This function runs the shape predictor on the input image and returns \n\ + a single full_object_detection.") + .def("save", save_shape_predictor, py::arg("predictor_output_filename"), "Save a shape_predictor to the provided path.") + .def(py::pickle(&getstate<type>, &setstate<type>)); + } + { + m.def("train_shape_predictor", train_shape_predictor_on_images_py, + py::arg("images"), py::arg("object_detections"), py::arg("options"), +"requires \n\ + - options.lambda_param > 0 \n\ + - 0 < options.nu <= 1 \n\ + - options.feature_pool_region_padding >= 0 \n\ + - len(images) == len(object_detections) \n\ + - images should be a list of numpy matrices that represent images, either RGB or grayscale. \n\ + - object_detections should be a list of lists of dlib.full_object_detection objects. \ + Each dlib.full_object_detection contains the bounding box and the lists of points that make up the object parts.\n\ +ensures \n\ + - Uses dlib's shape_predictor_trainer object to train a \n\ + shape_predictor based on the provided labeled images, full_object_detections, and options.\n\ + - The trained shape_predictor is returned"); + + m.def("train_shape_predictor", train_shape_predictor, + py::arg("dataset_filename"), py::arg("predictor_output_filename"), py::arg("options"), +"requires \n\ + - options.lambda_param > 0 \n\ + - 0 < options.nu <= 1 \n\ + - options.feature_pool_region_padding >= 0 \n\ +ensures \n\ + - Uses dlib's shape_predictor_trainer to train a \n\ + shape_predictor based on the labeled images in the XML file \n\ + dataset_filename and the provided options. This function assumes the file dataset_filename is in the \n\ + XML format produced by dlib's save_image_dataset_metadata() routine. \n\ + - The trained shape predictor is serialized to the file predictor_output_filename."); + + m.def("test_shape_predictor", test_shape_predictor_py, + py::arg("dataset_filename"), py::arg("predictor_filename"), +"ensures \n\ + - Loads an image dataset from dataset_filename. We assume dataset_filename is \n\ + a file using the XML format written by save_image_dataset_metadata(). \n\ + - Loads a shape_predictor from the file predictor_filename. This means \n\ + predictor_filename should be a file produced by the train_shape_predictor() \n\ + routine. \n\ + - This function tests the predictor against the dataset and returns the \n\ + mean average error of the detector. In fact, The \n\ + return value of this function is identical to that of dlib's \n\ + shape_predictor_trainer() routine. Therefore, see the documentation \n\ + for shape_predictor_trainer() for a detailed definition of the mean average error."); + + m.def("test_shape_predictor", test_shape_predictor_with_images_no_scales_py, + py::arg("images"), py::arg("detections"), py::arg("shape_predictor"), +"requires \n\ + - len(images) == len(object_detections) \n\ + - images should be a list of numpy matrices that represent images, either RGB or grayscale. \n\ + - object_detections should be a list of lists of dlib.full_object_detection objects. \ + Each dlib.full_object_detection contains the bounding box and the lists of points that make up the object parts.\n\ + ensures \n\ + - shape_predictor should be a file produced by the train_shape_predictor() \n\ + routine. \n\ + - This function tests the predictor against the dataset and returns the \n\ + mean average error of the detector. In fact, The \n\ + return value of this function is identical to that of dlib's \n\ + shape_predictor_trainer() routine. Therefore, see the documentation \n\ + for shape_predictor_trainer() for a detailed definition of the mean average error."); + + + m.def("test_shape_predictor", test_shape_predictor_with_images_py, + py::arg("images"), py::arg("detections"), py::arg("scales"), py::arg("shape_predictor"), +"requires \n\ + - len(images) == len(object_detections) \n\ + - len(object_detections) == len(scales) \n\ + - for every sublist in object_detections: len(object_detections[i]) == len(scales[i]) \n\ + - scales is a list of floating point scales that each predicted part location \ + should be divided by. Useful for normalization. \n\ + - images should be a list of numpy matrices that represent images, either RGB or grayscale. \n\ + - object_detections should be a list of lists of dlib.full_object_detection objects. \ + Each dlib.full_object_detection contains the bounding box and the lists of points that make up the object parts.\n\ + ensures \n\ + - shape_predictor should be a file produced by the train_shape_predictor() \n\ + routine. \n\ + - This function tests the predictor against the dataset and returns the \n\ + mean average error of the detector. In fact, The \n\ + return value of this function is identical to that of dlib's \n\ + shape_predictor_trainer() routine. Therefore, see the documentation \n\ + for shape_predictor_trainer() for a detailed definition of the mean average error."); + } +} diff --git a/ml/dlib/tools/python/src/shape_predictor.h b/ml/dlib/tools/python/src/shape_predictor.h new file mode 100644 index 00000000..f7a071a7 --- /dev/null +++ b/ml/dlib/tools/python/src/shape_predictor.h @@ -0,0 +1,259 @@ +// Copyright (C) 2014 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_SHAPE_PREDICTOR_H__ +#define DLIB_SHAPE_PREDICTOR_H__ + +#include "dlib/string.h" +#include "dlib/geometry.h" +#include "dlib/data_io/load_image_dataset.h" +#include "dlib/image_processing.h" + +using namespace std; + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + struct shape_predictor_training_options + { + shape_predictor_training_options() + { + be_verbose = false; + cascade_depth = 10; + tree_depth = 4; + num_trees_per_cascade_level = 500; + nu = 0.1; + oversampling_amount = 20; + feature_pool_size = 400; + lambda_param = 0.1; + num_test_splits = 20; + feature_pool_region_padding = 0; + random_seed = ""; + num_threads = 0; + } + + bool be_verbose; + unsigned long cascade_depth; + unsigned long tree_depth; + unsigned long num_trees_per_cascade_level; + double nu; + unsigned long oversampling_amount; + unsigned long feature_pool_size; + double lambda_param; + unsigned long num_test_splits; + double feature_pool_region_padding; + std::string random_seed; + + // not serialized + unsigned long num_threads; + }; + + inline void serialize ( + const shape_predictor_training_options& item, + std::ostream& out + ) + { + try + { + serialize(item.be_verbose,out); + serialize(item.cascade_depth,out); + serialize(item.tree_depth,out); + serialize(item.num_trees_per_cascade_level,out); + serialize(item.nu,out); + serialize(item.oversampling_amount,out); + serialize(item.feature_pool_size,out); + serialize(item.lambda_param,out); + serialize(item.num_test_splits,out); + serialize(item.feature_pool_region_padding,out); + serialize(item.random_seed,out); + } + catch (serialization_error& e) + { + throw serialization_error(e.info + "\n while serializing an object of type shape_predictor_training_options"); + } + } + + inline void deserialize ( + shape_predictor_training_options& item, + std::istream& in + ) + { + try + { + deserialize(item.be_verbose,in); + deserialize(item.cascade_depth,in); + deserialize(item.tree_depth,in); + deserialize(item.num_trees_per_cascade_level,in); + deserialize(item.nu,in); + deserialize(item.oversampling_amount,in); + deserialize(item.feature_pool_size,in); + deserialize(item.lambda_param,in); + deserialize(item.num_test_splits,in); + deserialize(item.feature_pool_region_padding,in); + deserialize(item.random_seed,in); + } + catch (serialization_error& e) + { + throw serialization_error(e.info + "\n while deserializing an object of type shape_predictor_training_options"); + } + } + + string print_shape_predictor_training_options(const shape_predictor_training_options& o) + { + std::ostringstream sout; + sout << "shape_predictor_training_options(" + << "be_verbose=" << o.be_verbose << "," + << "cascade_depth=" << o.cascade_depth << "," + << "tree_depth=" << o.tree_depth << "," + << "num_trees_per_cascade_level=" << o.num_trees_per_cascade_level << "," + << "nu=" << o.nu << "," + << "oversampling_amount=" << o.oversampling_amount << "," + << "feature_pool_size=" << o.feature_pool_size << "," + << "lambda_param=" << o.lambda_param << "," + << "num_test_splits=" << o.num_test_splits << "," + << "feature_pool_region_padding=" << o.feature_pool_region_padding << "," + << "random_seed=" << o.random_seed << "," + << "num_threads=" << o.num_threads + << ")"; + return sout.str(); + } + +// ---------------------------------------------------------------------------------------- + + namespace impl + { + inline bool contains_any_detections ( + const std::vector<std::vector<full_object_detection> >& detections + ) + { + for (unsigned long i = 0; i < detections.size(); ++i) + { + if (detections[i].size() != 0) + return true; + } + return false; + } + } + +// ---------------------------------------------------------------------------------------- + + template <typename image_array> + inline shape_predictor train_shape_predictor_on_images ( + image_array& images, + std::vector<std::vector<full_object_detection> >& detections, + const shape_predictor_training_options& options + ) + { + if (options.lambda_param <= 0) + throw error("Invalid lambda_param value given to train_shape_predictor(), lambda_param must be > 0."); + if (!(0 < options.nu && options.nu <= 1)) + throw error("Invalid nu value given to train_shape_predictor(). It is required that 0 < nu <= 1."); + if (options.feature_pool_region_padding <= -0.5) + throw error("Invalid feature_pool_region_padding value given to train_shape_predictor(), feature_pool_region_padding must be > -0.5."); + + if (images.size() != detections.size()) + throw error("The list of images must have the same length as the list of detections."); + + if (!impl::contains_any_detections(detections)) + throw error("Error, the training dataset does not have any labeled object detections in it."); + + shape_predictor_trainer trainer; + + trainer.set_cascade_depth(options.cascade_depth); + trainer.set_tree_depth(options.tree_depth); + trainer.set_num_trees_per_cascade_level(options.num_trees_per_cascade_level); + trainer.set_nu(options.nu); + trainer.set_random_seed(options.random_seed); + trainer.set_oversampling_amount(options.oversampling_amount); + trainer.set_feature_pool_size(options.feature_pool_size); + trainer.set_feature_pool_region_padding(options.feature_pool_region_padding); + trainer.set_lambda(options.lambda_param); + trainer.set_num_test_splits(options.num_test_splits); + trainer.set_num_threads(options.num_threads); + + if (options.be_verbose) + { + std::cout << "Training with cascade depth: " << options.cascade_depth << std::endl; + std::cout << "Training with tree depth: " << options.tree_depth << std::endl; + std::cout << "Training with " << options.num_trees_per_cascade_level << " trees per cascade level."<< std::endl; + std::cout << "Training with nu: " << options.nu << std::endl; + std::cout << "Training with random seed: " << options.random_seed << std::endl; + std::cout << "Training with oversampling amount: " << options.oversampling_amount << std::endl; + std::cout << "Training with feature pool size: " << options.feature_pool_size << std::endl; + std::cout << "Training with feature pool region padding: " << options.feature_pool_region_padding << std::endl; + std::cout << "Training with " << options.num_threads << " threads." << std::endl; + std::cout << "Training with lambda_param: " << options.lambda_param << std::endl; + std::cout << "Training with " << options.num_test_splits << " split tests."<< std::endl; + trainer.be_verbose(); + } + + shape_predictor predictor = trainer.train(images, detections); + + return predictor; + } + + inline void train_shape_predictor ( + const std::string& dataset_filename, + const std::string& predictor_output_filename, + const shape_predictor_training_options& options + ) + { + dlib::array<array2d<unsigned char> > images; + std::vector<std::vector<full_object_detection> > objects; + load_image_dataset(images, objects, dataset_filename); + + shape_predictor predictor = train_shape_predictor_on_images(images, objects, options); + + serialize(predictor_output_filename) << predictor; + + if (options.be_verbose) + std::cout << "Training complete, saved predictor to file " << predictor_output_filename << std::endl; + } + +// ---------------------------------------------------------------------------------------- + + template <typename image_array> + inline double test_shape_predictor_with_images ( + image_array& images, + std::vector<std::vector<full_object_detection> >& detections, + std::vector<std::vector<double> >& scales, + const shape_predictor& predictor + ) + { + if (images.size() != detections.size()) + throw error("The list of images must have the same length as the list of detections."); + if (scales.size() > 0 && scales.size() != images.size()) + throw error("The list of scales must have the same length as the list of detections."); + + if (scales.size() > 0) + return test_shape_predictor(predictor, images, detections, scales); + else + return test_shape_predictor(predictor, images, detections); + } + + inline double test_shape_predictor_py ( + const std::string& dataset_filename, + const std::string& predictor_filename + ) + { + // Load the images, no scales can be provided + dlib::array<array2d<unsigned char> > images; + // This interface cannot take the scales parameter. + std::vector<std::vector<double> > scales; + std::vector<std::vector<full_object_detection> > objects; + load_image_dataset(images, objects, dataset_filename); + + // Load the shape predictor + shape_predictor predictor; + deserialize(predictor_filename) >> predictor; + + return test_shape_predictor_with_images(images, objects, scales, predictor); + } + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_SHAPE_PREDICTOR_H__ + diff --git a/ml/dlib/tools/python/src/simple_object_detector.h b/ml/dlib/tools/python/src/simple_object_detector.h new file mode 100644 index 00000000..4fceab42 --- /dev/null +++ b/ml/dlib/tools/python/src/simple_object_detector.h @@ -0,0 +1,318 @@ +// Copyright (C) 2014 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_SIMPLE_ObJECT_DETECTOR_H__ +#define DLIB_SIMPLE_ObJECT_DETECTOR_H__ + +#include "dlib/image_processing/object_detector.h" +#include "dlib/string.h" +#include "dlib/image_processing/scan_fhog_pyramid.h" +#include "dlib/svm/structural_object_detection_trainer.h" +#include "dlib/geometry.h" +#include "dlib/data_io/load_image_dataset.h" +#include "dlib/image_processing/remove_unobtainable_rectangles.h" +#include "serialize_object_detector.h" +#include "dlib/svm.h" + + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + typedef object_detector<scan_fhog_pyramid<pyramid_down<6> > > simple_object_detector; + +// ---------------------------------------------------------------------------------------- + + struct simple_object_detector_training_options + { + simple_object_detector_training_options() + { + be_verbose = false; + add_left_right_image_flips = false; + num_threads = 4; + detection_window_size = 80*80; + C = 1; + epsilon = 0.01; + upsample_limit = 2; + } + + bool be_verbose; + bool add_left_right_image_flips; + unsigned long num_threads; + unsigned long detection_window_size; + double C; + double epsilon; + unsigned long upsample_limit; + }; + +// ---------------------------------------------------------------------------------------- + + namespace impl + { + inline void pick_best_window_size ( + const std::vector<std::vector<rectangle> >& boxes, + unsigned long& width, + unsigned long& height, + const unsigned long target_size + ) + { + // find the average width and height + running_stats<double> avg_width, avg_height; + for (unsigned long i = 0; i < boxes.size(); ++i) + { + for (unsigned long j = 0; j < boxes[i].size(); ++j) + { + avg_width.add(boxes[i][j].width()); + avg_height.add(boxes[i][j].height()); + } + } + + // now adjust the box size so that it is about target_pixels pixels in size + double size = avg_width.mean()*avg_height.mean(); + double scale = std::sqrt(target_size/size); + + width = (unsigned long)(avg_width.mean()*scale+0.5); + height = (unsigned long)(avg_height.mean()*scale+0.5); + // make sure the width and height never round to zero. + if (width == 0) + width = 1; + if (height == 0) + height = 1; + } + + inline bool contains_any_boxes ( + const std::vector<std::vector<rectangle> >& boxes + ) + { + for (unsigned long i = 0; i < boxes.size(); ++i) + { + if (boxes[i].size() != 0) + return true; + } + return false; + } + + inline void throw_invalid_box_error_message ( + const std::string& dataset_filename, + const std::vector<std::vector<rectangle> >& removed, + const simple_object_detector_training_options& options + ) + { + + std::ostringstream sout; + // Note that the 1/16 factor is here because we will try to upsample the image + // 2 times to accommodate small boxes. We also take the max because we want to + // lower bound the size of the smallest recommended box. This is because the + // 8x8 HOG cells can't really deal with really small object boxes. + sout << "Error! An impossible set of object boxes was given for training. "; + sout << "All the boxes need to have a similar aspect ratio and also not be "; + sout << "smaller than about " << std::max<long>(20*20,options.detection_window_size/16) << " pixels in area. "; + + std::ostringstream sout2; + if (dataset_filename.size() != 0) + { + sout << "The following images contain invalid boxes:\n"; + image_dataset_metadata::dataset data; + load_image_dataset_metadata(data, dataset_filename); + for (unsigned long i = 0; i < removed.size(); ++i) + { + if (removed[i].size() != 0) + { + const std::string imgname = data.images[i].filename; + sout2 << " " << imgname << "\n"; + } + } + } + throw error("\n"+wrap_string(sout.str()) + "\n" + sout2.str()); + } + } + +// ---------------------------------------------------------------------------------------- + + template <typename image_array> + inline simple_object_detector_py train_simple_object_detector_on_images ( + const std::string& dataset_filename, // can be "" if it's not applicable + image_array& images, + std::vector<std::vector<rectangle> >& boxes, + std::vector<std::vector<rectangle> >& ignore, + const simple_object_detector_training_options& options + ) + { + if (options.C <= 0) + throw error("Invalid C value given to train_simple_object_detector(), C must be > 0."); + if (options.epsilon <= 0) + throw error("Invalid epsilon value given to train_simple_object_detector(), epsilon must be > 0."); + + if (images.size() != boxes.size()) + throw error("The list of images must have the same length as the list of boxes."); + if (images.size() != ignore.size()) + throw error("The list of images must have the same length as the list of ignore boxes."); + + if (impl::contains_any_boxes(boxes) == false) + throw error("Error, the training dataset does not have any labeled object boxes in it."); + + typedef scan_fhog_pyramid<pyramid_down<6> > image_scanner_type; + image_scanner_type scanner; + unsigned long width, height; + impl::pick_best_window_size(boxes, width, height, options.detection_window_size); + scanner.set_detection_window_size(width, height); + structural_object_detection_trainer<image_scanner_type> trainer(scanner); + trainer.set_num_threads(options.num_threads); + trainer.set_c(options.C); + trainer.set_epsilon(options.epsilon); + if (options.be_verbose) + { + std::cout << "Training with C: " << options.C << std::endl; + std::cout << "Training with epsilon: " << options.epsilon << std::endl; + std::cout << "Training using " << options.num_threads << " threads."<< std::endl; + std::cout << "Training with sliding window " << width << " pixels wide by " << height << " pixels tall." << std::endl; + if (options.add_left_right_image_flips) + std::cout << "Training on both left and right flipped versions of images." << std::endl; + trainer.be_verbose(); + } + + unsigned long upsampling_amount = 0; + + // now make sure all the boxes are obtainable by the scanner. We will try and + // upsample the images at most two times to help make the boxes obtainable. + std::vector<std::vector<rectangle> > temp(boxes), removed; + removed = remove_unobtainable_rectangles(trainer, images, temp); + while (impl::contains_any_boxes(removed) && upsampling_amount < options.upsample_limit) + { + ++upsampling_amount; + if (options.be_verbose) + std::cout << "Upsample images..." << std::endl; + upsample_image_dataset<pyramid_down<2> >(images, boxes, ignore); + temp = boxes; + removed = remove_unobtainable_rectangles(trainer, images, temp); + } + // if we weren't able to get all the boxes to match then throw an error + if (impl::contains_any_boxes(removed)) + impl::throw_invalid_box_error_message(dataset_filename, removed, options); + + if (options.add_left_right_image_flips) + add_image_left_right_flips(images, boxes, ignore); + + simple_object_detector detector = trainer.train(images, boxes, ignore); + + if (options.be_verbose) + { + std::cout << "Training complete." << std::endl; + std::cout << "Trained with C: " << options.C << std::endl; + std::cout << "Training with epsilon: " << options.epsilon << std::endl; + std::cout << "Trained using " << options.num_threads << " threads."<< std::endl; + std::cout << "Trained with sliding window " << width << " pixels wide by " << height << " pixels tall." << std::endl; + if (upsampling_amount != 0) + { + // Unsampled images # time(s) to allow detection of small boxes + std::cout << "Upsampled images " << upsampling_amount; + std::cout << ((upsampling_amount > 1) ? " times" : " time"); + std::cout << " to allow detection of small boxes." << std::endl; + } + if (options.add_left_right_image_flips) + std::cout << "Trained on both left and right flipped versions of images." << std::endl; + } + + return simple_object_detector_py(detector, upsampling_amount); + } + +// ---------------------------------------------------------------------------------------- + + inline void train_simple_object_detector ( + const std::string& dataset_filename, + const std::string& detector_output_filename, + const simple_object_detector_training_options& options + ) + { + dlib::array<array2d<rgb_pixel> > images; + std::vector<std::vector<rectangle> > boxes, ignore; + ignore = load_image_dataset(images, boxes, dataset_filename); + + simple_object_detector_py detector = train_simple_object_detector_on_images(dataset_filename, images, boxes, ignore, options); + + save_simple_object_detector_py(detector, detector_output_filename); + + if (options.be_verbose) + std::cout << "Saved detector to file " << detector_output_filename << std::endl; + } + +// ---------------------------------------------------------------------------------------- + + struct simple_test_results + { + double precision; + double recall; + double average_precision; + }; + + template <typename image_array> + inline const simple_test_results test_simple_object_detector_with_images ( + image_array& images, + const unsigned int upsample_amount, + std::vector<std::vector<rectangle> >& boxes, + std::vector<std::vector<rectangle> >& ignore, + simple_object_detector& detector + ) + { + for (unsigned int i = 0; i < upsample_amount; ++i) + upsample_image_dataset<pyramid_down<2> >(images, boxes); + + matrix<double,1,3> res = test_object_detection_function(detector, images, boxes, ignore); + simple_test_results ret; + ret.precision = res(0); + ret.recall = res(1); + ret.average_precision = res(2); + return ret; + } + + inline const simple_test_results test_simple_object_detector ( + const std::string& dataset_filename, + const std::string& detector_filename, + const int upsample_amount + ) + { + // Load all the testing images + dlib::array<array2d<rgb_pixel> > images; + std::vector<std::vector<rectangle> > boxes, ignore; + ignore = load_image_dataset(images, boxes, dataset_filename); + + // Load the detector off disk (We have to use the explicit serialization here + // so that we have an open file stream) + simple_object_detector detector; + std::ifstream fin(detector_filename.c_str(), std::ios::binary); + if (!fin) + throw error("Unable to open file " + detector_filename); + deserialize(detector, fin); + + + /* Here we need a little hack to deal with whether we are going to be loading a + * simple_object_detector (possibly trained outside of Python) or a + * simple_object_detector_py (definitely trained from Python). In order to do this + * we peek into the filestream to see if there is more data after the object + * detector. If there is, it will be the version and upsampling amount. Therefore, + * by default we set the upsampling amount to -1 so that we can catch when no + * upsampling amount has been passed (numbers less than 0). If -1 is passed, we + * assume no upsampling and use 0. If a number > 0 is passed, we use that, else we + * use the upsampling amount saved in the detector file (if it exists). + */ + unsigned int final_upsampling_amount = 0; + if (fin.peek() != EOF) + { + int version = 0; + deserialize(version, fin); + if (version != 1) + throw error("Unknown simple_object_detector format."); + deserialize(final_upsampling_amount, fin); + } + if (upsample_amount >= 0) + final_upsampling_amount = upsample_amount; + + return test_simple_object_detector_with_images(images, final_upsampling_amount, boxes, ignore, detector); + } + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_SIMPLE_ObJECT_DETECTOR_H__ + diff --git a/ml/dlib/tools/python/src/simple_object_detector_py.h b/ml/dlib/tools/python/src/simple_object_detector_py.h new file mode 100644 index 00000000..0f950273 --- /dev/null +++ b/ml/dlib/tools/python/src/simple_object_detector_py.h @@ -0,0 +1,290 @@ +// Copyright (C) 2014 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_SIMPLE_OBJECT_DETECTOR_PY_H__ +#define DLIB_SIMPLE_OBJECT_DETECTOR_PY_H__ + +#include "opaque_types.h" +#include <dlib/python.h> +#include <dlib/matrix.h> +#include <dlib/geometry.h> +#include <dlib/image_processing/frontal_face_detector.h> + +namespace py = pybind11; + +namespace dlib +{ + typedef object_detector<scan_fhog_pyramid<pyramid_down<6> > > simple_object_detector; + + inline void split_rect_detections ( + std::vector<rect_detection>& rect_detections, + std::vector<rectangle>& rectangles, + std::vector<double>& detection_confidences, + std::vector<unsigned long>& weight_indices + ) + { + rectangles.clear(); + detection_confidences.clear(); + weight_indices.clear(); + + for (unsigned long i = 0; i < rect_detections.size(); ++i) + { + rectangles.push_back(rect_detections[i].rect); + detection_confidences.push_back(rect_detections[i].detection_confidence); + weight_indices.push_back(rect_detections[i].weight_index); + } + } + + + inline std::vector<dlib::rectangle> run_detector_with_upscale1 ( + dlib::simple_object_detector& detector, + py::object img, + const unsigned int upsampling_amount, + const double adjust_threshold, + std::vector<double>& detection_confidences, + std::vector<unsigned long>& weight_indices + ) + { + pyramid_down<2> pyr; + + std::vector<rectangle> rectangles; + std::vector<rect_detection> rect_detections; + + if (is_gray_python_image(img)) + { + array2d<unsigned char> temp; + if (upsampling_amount == 0) + { + detector(numpy_gray_image(img), rect_detections, adjust_threshold); + split_rect_detections(rect_detections, rectangles, + detection_confidences, weight_indices); + return rectangles; + } + else + { + pyramid_up(numpy_gray_image(img), temp, pyr); + unsigned int levels = upsampling_amount-1; + while (levels > 0) + { + levels--; + pyramid_up(temp); + } + + detector(temp, rect_detections, adjust_threshold); + for (unsigned long i = 0; i < rect_detections.size(); ++i) + rect_detections[i].rect = pyr.rect_down(rect_detections[i].rect, + upsampling_amount); + split_rect_detections(rect_detections, rectangles, + detection_confidences, weight_indices); + + return rectangles; + } + } + else if (is_rgb_python_image(img)) + { + array2d<rgb_pixel> temp; + if (upsampling_amount == 0) + { + detector(numpy_rgb_image(img), rect_detections, adjust_threshold); + split_rect_detections(rect_detections, rectangles, + detection_confidences, weight_indices); + return rectangles; + } + else + { + pyramid_up(numpy_rgb_image(img), temp, pyr); + unsigned int levels = upsampling_amount-1; + while (levels > 0) + { + levels--; + pyramid_up(temp); + } + + detector(temp, rect_detections, adjust_threshold); + for (unsigned long i = 0; i < rect_detections.size(); ++i) + rect_detections[i].rect = pyr.rect_down(rect_detections[i].rect, + upsampling_amount); + split_rect_detections(rect_detections, rectangles, + detection_confidences, weight_indices); + + return rectangles; + } + } + else + { + throw dlib::error("Unsupported image type, must be 8bit gray or RGB image."); + } + } + + inline std::vector<dlib::rectangle> run_detectors_with_upscale1 ( + std::vector<simple_object_detector >& detectors, + py::object img, + const unsigned int upsampling_amount, + const double adjust_threshold, + std::vector<double>& detection_confidences, + std::vector<unsigned long>& weight_indices + ) + { + pyramid_down<2> pyr; + + std::vector<rectangle> rectangles; + std::vector<rect_detection> rect_detections; + + if (is_gray_python_image(img)) + { + array2d<unsigned char> temp; + if (upsampling_amount == 0) + { + evaluate_detectors(detectors, numpy_gray_image(img), rect_detections, adjust_threshold); + split_rect_detections(rect_detections, rectangles, + detection_confidences, weight_indices); + return rectangles; + } + else + { + pyramid_up(numpy_gray_image(img), temp, pyr); + unsigned int levels = upsampling_amount-1; + while (levels > 0) + { + levels--; + pyramid_up(temp); + } + + evaluate_detectors(detectors, temp, rect_detections, adjust_threshold); + for (unsigned long i = 0; i < rect_detections.size(); ++i) + rect_detections[i].rect = pyr.rect_down(rect_detections[i].rect, + upsampling_amount); + split_rect_detections(rect_detections, rectangles, + detection_confidences, weight_indices); + + return rectangles; + } + } + else if (is_rgb_python_image(img)) + { + array2d<rgb_pixel> temp; + if (upsampling_amount == 0) + { + evaluate_detectors(detectors, numpy_rgb_image(img), rect_detections, adjust_threshold); + split_rect_detections(rect_detections, rectangles, + detection_confidences, weight_indices); + return rectangles; + } + else + { + pyramid_up(numpy_rgb_image(img), temp, pyr); + unsigned int levels = upsampling_amount-1; + while (levels > 0) + { + levels--; + pyramid_up(temp); + } + + evaluate_detectors(detectors, temp, rect_detections, adjust_threshold); + for (unsigned long i = 0; i < rect_detections.size(); ++i) + rect_detections[i].rect = pyr.rect_down(rect_detections[i].rect, + upsampling_amount); + split_rect_detections(rect_detections, rectangles, + detection_confidences, weight_indices); + + return rectangles; + } + } + else + { + throw dlib::error("Unsupported image type, must be 8bit gray or RGB image."); + } + } + + inline std::vector<dlib::rectangle> run_detector_with_upscale2 ( + dlib::simple_object_detector& detector, + py::object img, + const unsigned int upsampling_amount + + ) + { + std::vector<double> detection_confidences; + std::vector<unsigned long> weight_indices; + const double adjust_threshold = 0.0; + + return run_detector_with_upscale1(detector, img, upsampling_amount, + adjust_threshold, + detection_confidences, weight_indices); + } + + inline py::tuple run_rect_detector ( + dlib::simple_object_detector& detector, + py::object img, + const unsigned int upsampling_amount, + const double adjust_threshold) + { + py::tuple t; + + std::vector<double> detection_confidences; + std::vector<unsigned long> weight_indices; + std::vector<rectangle> rectangles; + + rectangles = run_detector_with_upscale1(detector, img, upsampling_amount, + adjust_threshold, + detection_confidences, weight_indices); + + return py::make_tuple(rectangles, + vector_to_python_list(detection_confidences), + vector_to_python_list(weight_indices)); + } + + inline py::tuple run_multiple_rect_detectors ( + py::list& detectors, + py::object img, + const unsigned int upsampling_amount, + const double adjust_threshold) + { + py::tuple t; + + std::vector<simple_object_detector > vector_detectors; + const unsigned long num_detectors = len(detectors); + // Now copy the data into dlib based objects. + for (unsigned long i = 0; i < num_detectors; ++i) + { + vector_detectors.push_back(detectors[i].cast<simple_object_detector >()); + } + + std::vector<double> detection_confidences; + std::vector<unsigned long> weight_indices; + std::vector<rectangle> rectangles; + + rectangles = run_detectors_with_upscale1(vector_detectors, img, upsampling_amount, + adjust_threshold, + detection_confidences, weight_indices); + + return py::make_tuple(rectangles, + vector_to_python_list(detection_confidences), + vector_to_python_list(weight_indices)); + } + + + + struct simple_object_detector_py + { + simple_object_detector detector; + unsigned int upsampling_amount; + + simple_object_detector_py() {} + simple_object_detector_py(simple_object_detector& _detector, unsigned int _upsampling_amount) : + detector(_detector), upsampling_amount(_upsampling_amount) {} + + std::vector<dlib::rectangle> run_detector1 (py::object img, + const unsigned int upsampling_amount_) + { + return run_detector_with_upscale2(detector, img, upsampling_amount_); + } + + std::vector<dlib::rectangle> run_detector2 (py::object img) + { + return run_detector_with_upscale2(detector, img, upsampling_amount); + } + + + }; +} + +#endif // DLIB_SIMPLE_OBJECT_DETECTOR_PY_H__ diff --git a/ml/dlib/tools/python/src/svm_c_trainer.cpp b/ml/dlib/tools/python/src/svm_c_trainer.cpp new file mode 100644 index 00000000..7b592abe --- /dev/null +++ b/ml/dlib/tools/python/src/svm_c_trainer.cpp @@ -0,0 +1,311 @@ +// Copyright (C) 2013 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. + +#include "opaque_types.h" +#include <dlib/python.h> +#include "testing_results.h" +#include <dlib/matrix.h> +#include <dlib/svm_threaded.h> + +using namespace dlib; +using namespace std; + +typedef matrix<double,0,1> sample_type; +typedef std::vector<std::pair<unsigned long,double> > sparse_vect; + +template <typename trainer_type> +typename trainer_type::trained_function_type train ( + const trainer_type& trainer, + const std::vector<typename trainer_type::sample_type>& samples, + const std::vector<double>& labels +) +{ + pyassert(is_binary_classification_problem(samples,labels), "Invalid inputs"); + return trainer.train(samples, labels); +} + +template <typename trainer_type> +void set_epsilon ( trainer_type& trainer, double eps) +{ + pyassert(eps > 0, "epsilon must be > 0"); + trainer.set_epsilon(eps); +} + +template <typename trainer_type> +double get_epsilon ( const trainer_type& trainer) { return trainer.get_epsilon(); } + + +template <typename trainer_type> +void set_cache_size ( trainer_type& trainer, long cache_size) +{ + pyassert(cache_size > 0, "cache size must be > 0"); + trainer.set_cache_size(cache_size); +} + +template <typename trainer_type> +long get_cache_size ( const trainer_type& trainer) { return trainer.get_cache_size(); } + + +template <typename trainer_type> +void set_c ( trainer_type& trainer, double C) +{ + pyassert(C > 0, "C must be > 0"); + trainer.set_c(C); +} + +template <typename trainer_type> +void set_c_class1 ( trainer_type& trainer, double C) +{ + pyassert(C > 0, "C must be > 0"); + trainer.set_c_class1(C); +} + +template <typename trainer_type> +void set_c_class2 ( trainer_type& trainer, double C) +{ + pyassert(C > 0, "C must be > 0"); + trainer.set_c_class2(C); +} + +template <typename trainer_type> +double get_c_class1 ( const trainer_type& trainer) { return trainer.get_c_class1(); } +template <typename trainer_type> +double get_c_class2 ( const trainer_type& trainer) { return trainer.get_c_class2(); } + +template <typename trainer_type> +py::class_<trainer_type> setup_trainer_eps ( + py::module& m, + const std::string& name +) +{ + return py::class_<trainer_type>(m, name.c_str()) + .def("train", train<trainer_type>) + .def_property("epsilon", get_epsilon<trainer_type>, set_epsilon<trainer_type>); +} + +template <typename trainer_type> +py::class_<trainer_type> setup_trainer_eps_c ( + py::module& m, + const std::string& name +) +{ + return setup_trainer_eps<trainer_type>(m, name) + .def("set_c", set_c<trainer_type>) + .def_property("c_class1", get_c_class1<trainer_type>, set_c_class1<trainer_type>) + .def_property("c_class2", get_c_class2<trainer_type>, set_c_class2<trainer_type>); +} + +template <typename trainer_type> +py::class_<trainer_type> setup_trainer_eps_c_cache ( + py::module& m, + const std::string& name +) +{ + return setup_trainer_eps_c<trainer_type>(m, name) + .def_property("cache_size", get_cache_size<trainer_type>, set_cache_size<trainer_type>); +} + +template <typename trainer_type> +void set_gamma ( + trainer_type& trainer, + double gamma +) +{ + pyassert(gamma > 0, "gamma must be > 0"); + trainer.set_kernel(typename trainer_type::kernel_type(gamma)); +} + +template <typename trainer_type> +double get_gamma ( + const trainer_type& trainer +) +{ + return trainer.get_kernel().gamma; +} + +// ---------------------------------------------------------------------------------------- + +template < + typename trainer_type + > +const binary_test _cross_validate_trainer ( + const trainer_type& trainer, + const std::vector<typename trainer_type::sample_type>& x, + const std::vector<double>& y, + const unsigned long folds +) +{ + pyassert(is_binary_classification_problem(x,y), "Training data does not make a valid training set."); + pyassert(1 < folds && folds <= x.size(), "Invalid number of folds given."); + return cross_validate_trainer(trainer, x, y, folds); +} + +template < + typename trainer_type + > +const binary_test _cross_validate_trainer_t ( + const trainer_type& trainer, + const std::vector<typename trainer_type::sample_type>& x, + const std::vector<double>& y, + const unsigned long folds, + const unsigned long num_threads +) +{ + pyassert(is_binary_classification_problem(x,y), "Training data does not make a valid training set."); + pyassert(1 < folds && folds <= x.size(), "Invalid number of folds given."); + pyassert(1 < num_threads, "The number of threads specified must not be zero."); + return cross_validate_trainer_threaded(trainer, x, y, folds, num_threads); +} + +// ---------------------------------------------------------------------------------------- + +void bind_svm_c_trainer(py::module& m) +{ + namespace py = pybind11; + + // svm_c + { + typedef svm_c_trainer<radial_basis_kernel<sample_type> > T; + setup_trainer_eps_c_cache<T>(m, "svm_c_trainer_radial_basis") + .def(py::init()) + .def_property("gamma", get_gamma<T>, set_gamma<T>); + m.def("cross_validate_trainer", _cross_validate_trainer<T>, + py::arg("trainer"),py::arg("x"),py::arg("y"),py::arg("folds")); + m.def("cross_validate_trainer_threaded", _cross_validate_trainer_t<T>, + py::arg("trainer"),py::arg("x"),py::arg("y"),py::arg("folds"),py::arg("num_threads")); + } + + { + typedef svm_c_trainer<sparse_radial_basis_kernel<sparse_vect> > T; + setup_trainer_eps_c_cache<T>(m, "svm_c_trainer_sparse_radial_basis") + .def(py::init()) + .def_property("gamma", get_gamma<T>, set_gamma<T>); + m.def("cross_validate_trainer", _cross_validate_trainer<T>, + py::arg("trainer"),py::arg("x"),py::arg("y"),py::arg("folds")); + m.def("cross_validate_trainer_threaded", _cross_validate_trainer_t<T>, + py::arg("trainer"),py::arg("x"),py::arg("y"),py::arg("folds"),py::arg("num_threads")); + } + + { + typedef svm_c_trainer<histogram_intersection_kernel<sample_type> > T; + setup_trainer_eps_c_cache<T>(m, "svm_c_trainer_histogram_intersection") + .def(py::init()); + m.def("cross_validate_trainer", _cross_validate_trainer<T>, + py::arg("trainer"),py::arg("x"),py::arg("y"),py::arg("folds")); + m.def("cross_validate_trainer_threaded", _cross_validate_trainer_t<T>, + py::arg("trainer"),py::arg("x"),py::arg("y"),py::arg("folds"),py::arg("num_threads")); + } + + { + typedef svm_c_trainer<sparse_histogram_intersection_kernel<sparse_vect> > T; + setup_trainer_eps_c_cache<T>(m, "svm_c_trainer_sparse_histogram_intersection") + .def(py::init()); + m.def("cross_validate_trainer", _cross_validate_trainer<T>, + py::arg("trainer"),py::arg("x"),py::arg("y"),py::arg("folds")); + m.def("cross_validate_trainer_threaded", _cross_validate_trainer_t<T>, + py::arg("trainer"),py::arg("x"),py::arg("y"),py::arg("folds"),py::arg("num_threads")); + } + + // svm_c_linear + { + typedef svm_c_linear_trainer<linear_kernel<sample_type> > T; + setup_trainer_eps_c<T>(m, "svm_c_trainer_linear") + .def(py::init()) + .def_property("max_iterations", &T::get_max_iterations, &T::set_max_iterations) + .def_property("force_last_weight_to_1", &T::forces_last_weight_to_1, &T::force_last_weight_to_1) + .def_property("learns_nonnegative_weights", &T::learns_nonnegative_weights, &T::set_learns_nonnegative_weights) + .def_property_readonly("has_prior", &T::has_prior) + .def("set_prior", &T::set_prior) + .def("be_verbose", &T::be_verbose) + .def("be_quiet", &T::be_quiet); + + m.def("cross_validate_trainer", _cross_validate_trainer<T>, + py::arg("trainer"),py::arg("x"),py::arg("y"),py::arg("folds")); + m.def("cross_validate_trainer_threaded", _cross_validate_trainer_t<T>, + py::arg("trainer"),py::arg("x"),py::arg("y"),py::arg("folds"),py::arg("num_threads")); + } + + { + typedef svm_c_linear_trainer<sparse_linear_kernel<sparse_vect> > T; + setup_trainer_eps_c<T>(m, "svm_c_trainer_sparse_linear") + .def(py::init()) + .def_property("max_iterations", &T::get_max_iterations, &T::set_max_iterations) + .def_property("force_last_weight_to_1", &T::forces_last_weight_to_1, &T::force_last_weight_to_1) + .def_property("learns_nonnegative_weights", &T::learns_nonnegative_weights, &T::set_learns_nonnegative_weights) + .def_property_readonly("has_prior", &T::has_prior) + .def("set_prior", &T::set_prior) + .def("be_verbose", &T::be_verbose) + .def("be_quiet", &T::be_quiet); + + m.def("cross_validate_trainer", _cross_validate_trainer<T>, + py::arg("trainer"),py::arg("x"),py::arg("y"),py::arg("folds")); + m.def("cross_validate_trainer_threaded", _cross_validate_trainer_t<T>, + py::arg("trainer"),py::arg("x"),py::arg("y"),py::arg("folds"),py::arg("num_threads")); + } + + // rvm + { + typedef rvm_trainer<radial_basis_kernel<sample_type> > T; + setup_trainer_eps<T>(m, "rvm_trainer_radial_basis") + .def(py::init()) + .def_property("gamma", get_gamma<T>, set_gamma<T>); + m.def("cross_validate_trainer", _cross_validate_trainer<T>, + py::arg("trainer"),py::arg("x"),py::arg("y"),py::arg("folds")); + m.def("cross_validate_trainer_threaded", _cross_validate_trainer_t<T>, + py::arg("trainer"),py::arg("x"),py::arg("y"),py::arg("folds"),py::arg("num_threads")); + } + + { + typedef rvm_trainer<sparse_radial_basis_kernel<sparse_vect> > T; + setup_trainer_eps<T>(m, "rvm_trainer_sparse_radial_basis") + .def(py::init()) + .def_property("gamma", get_gamma<T>, set_gamma<T>); + m.def("cross_validate_trainer", _cross_validate_trainer<T>, + py::arg("trainer"),py::arg("x"),py::arg("y"),py::arg("folds")); + m.def("cross_validate_trainer_threaded", _cross_validate_trainer_t<T>, + py::arg("trainer"),py::arg("x"),py::arg("y"),py::arg("folds"),py::arg("num_threads")); + } + + { + typedef rvm_trainer<histogram_intersection_kernel<sample_type> > T; + setup_trainer_eps<T>(m, "rvm_trainer_histogram_intersection") + .def(py::init()); + m.def("cross_validate_trainer", _cross_validate_trainer<T>, + py::arg("trainer"),py::arg("x"),py::arg("y"),py::arg("folds")); + m.def("cross_validate_trainer_threaded", _cross_validate_trainer_t<T>, + py::arg("trainer"),py::arg("x"),py::arg("y"),py::arg("folds"),py::arg("num_threads")); + } + + { + typedef rvm_trainer<sparse_histogram_intersection_kernel<sparse_vect> > T; + setup_trainer_eps<T>(m, "rvm_trainer_sparse_histogram_intersection") + .def(py::init()); + m.def("cross_validate_trainer", _cross_validate_trainer<T>, + py::arg("trainer"),py::arg("x"),py::arg("y"),py::arg("folds")); + m.def("cross_validate_trainer_threaded", _cross_validate_trainer_t<T>, + py::arg("trainer"),py::arg("x"),py::arg("y"),py::arg("folds"),py::arg("num_threads")); + } + + // rvm linear + { + typedef rvm_trainer<linear_kernel<sample_type> > T; + setup_trainer_eps<T>(m, "rvm_trainer_linear") + .def(py::init()); + m.def("cross_validate_trainer", _cross_validate_trainer<T>, + py::arg("trainer"),py::arg("x"),py::arg("y"),py::arg("folds")); + m.def("cross_validate_trainer_threaded", _cross_validate_trainer_t<T>, + py::arg("trainer"),py::arg("x"),py::arg("y"),py::arg("folds"),py::arg("num_threads")); + } + + { + typedef rvm_trainer<sparse_linear_kernel<sparse_vect> > T; + setup_trainer_eps<T>(m, "rvm_trainer_sparse_linear") + .def(py::init()); + m.def("cross_validate_trainer", _cross_validate_trainer<T>, + py::arg("trainer"),py::arg("x"),py::arg("y"),py::arg("folds")); + m.def("cross_validate_trainer_threaded", _cross_validate_trainer_t<T>, + py::arg("trainer"),py::arg("x"),py::arg("y"),py::arg("folds"),py::arg("num_threads")); + } +} + + diff --git a/ml/dlib/tools/python/src/svm_rank_trainer.cpp b/ml/dlib/tools/python/src/svm_rank_trainer.cpp new file mode 100644 index 00000000..26cf3111 --- /dev/null +++ b/ml/dlib/tools/python/src/svm_rank_trainer.cpp @@ -0,0 +1,161 @@ +// Copyright (C) 2013 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. + +#include "opaque_types.h" +#include <dlib/python.h> +#include <dlib/matrix.h> +#include <dlib/svm.h> +#include "testing_results.h" +#include <pybind11/stl_bind.h> + +using namespace dlib; +using namespace std; +namespace py = pybind11; + +typedef matrix<double,0,1> sample_type; + + +// ---------------------------------------------------------------------------------------- + +namespace dlib +{ + template <typename T> + bool operator== ( + const ranking_pair<T>&, + const ranking_pair<T>& + ) + { + pyassert(false, "It is illegal to compare ranking pair objects for equality."); + return false; + } +} + +template <typename T> +void resize(T& v, unsigned long n) { v.resize(n); } + +// ---------------------------------------------------------------------------------------- + +template <typename trainer_type> +typename trainer_type::trained_function_type train1 ( + const trainer_type& trainer, + const ranking_pair<typename trainer_type::sample_type>& sample +) +{ + typedef ranking_pair<typename trainer_type::sample_type> st; + pyassert(is_ranking_problem(std::vector<st>(1, sample)), "Invalid inputs"); + return trainer.train(sample); +} + +template <typename trainer_type> +typename trainer_type::trained_function_type train2 ( + const trainer_type& trainer, + const std::vector<ranking_pair<typename trainer_type::sample_type> >& samples +) +{ + pyassert(is_ranking_problem(samples), "Invalid inputs"); + return trainer.train(samples); +} + +template <typename trainer_type> +void set_epsilon ( trainer_type& trainer, double eps) +{ + pyassert(eps > 0, "epsilon must be > 0"); + trainer.set_epsilon(eps); +} + +template <typename trainer_type> +double get_epsilon ( const trainer_type& trainer) { return trainer.get_epsilon(); } + +template <typename trainer_type> +void set_c ( trainer_type& trainer, double C) +{ + pyassert(C > 0, "C must be > 0"); + trainer.set_c(C); +} + +template <typename trainer_type> +double get_c (const trainer_type& trainer) +{ + return trainer.get_c(); +} + + +template <typename trainer> +void add_ranker ( + py::module& m, + const char* name +) +{ + py::class_<trainer>(m, name) + .def(py::init()) + .def_property("epsilon", get_epsilon<trainer>, set_epsilon<trainer>) + .def_property("c", get_c<trainer>, set_c<trainer>) + .def_property("max_iterations", &trainer::get_max_iterations, &trainer::set_max_iterations) + .def_property("force_last_weight_to_1", &trainer::forces_last_weight_to_1, &trainer::force_last_weight_to_1) + .def_property("learns_nonnegative_weights", &trainer::learns_nonnegative_weights, &trainer::set_learns_nonnegative_weights) + .def_property_readonly("has_prior", &trainer::has_prior) + .def("train", train1<trainer>) + .def("train", train2<trainer>) + .def("set_prior", &trainer::set_prior) + .def("be_verbose", &trainer::be_verbose) + .def("be_quiet", &trainer::be_quiet); +} + +// ---------------------------------------------------------------------------------------- + +template < + typename trainer_type, + typename T + > +const ranking_test _cross_ranking_validate_trainer ( + const trainer_type& trainer, + const std::vector<ranking_pair<T> >& samples, + const unsigned long folds +) +{ + pyassert(is_ranking_problem(samples), "Training data does not make a valid training set."); + pyassert(1 < folds && folds <= samples.size(), "Invalid number of folds given."); + return cross_validate_ranking_trainer(trainer, samples, folds); +} + +// ---------------------------------------------------------------------------------------- + +void bind_svm_rank_trainer(py::module& m) +{ + py::class_<ranking_pair<sample_type> >(m, "ranking_pair") + .def(py::init()) + .def_readwrite("relevant", &ranking_pair<sample_type>::relevant) + .def_readwrite("nonrelevant", &ranking_pair<sample_type>::nonrelevant) + .def(py::pickle(&getstate<ranking_pair<sample_type>>, &setstate<ranking_pair<sample_type>>)); + + py::class_<ranking_pair<sparse_vect> >(m, "sparse_ranking_pair") + .def(py::init()) + .def_readwrite("relevant", &ranking_pair<sparse_vect>::relevant) + .def_readwrite("nonrelevant", &ranking_pair<sparse_vect>::nonrelevant) + .def(py::pickle(&getstate<ranking_pair<sparse_vect>>, &setstate<ranking_pair<sparse_vect>>)); + + py::bind_vector<ranking_pairs>(m, "ranking_pairs") + .def("clear", &ranking_pairs::clear) + .def("resize", resize<ranking_pairs>) + .def("extend", extend_vector_with_python_list<ranking_pair<sample_type>>) + .def(py::pickle(&getstate<ranking_pairs>, &setstate<ranking_pairs>)); + + py::bind_vector<sparse_ranking_pairs>(m, "sparse_ranking_pairs") + .def("clear", &sparse_ranking_pairs::clear) + .def("resize", resize<sparse_ranking_pairs>) + .def("extend", extend_vector_with_python_list<ranking_pair<sparse_vect>>) + .def(py::pickle(&getstate<sparse_ranking_pairs>, &setstate<sparse_ranking_pairs>)); + + add_ranker<svm_rank_trainer<linear_kernel<sample_type> > >(m, "svm_rank_trainer"); + add_ranker<svm_rank_trainer<sparse_linear_kernel<sparse_vect> > >(m, "svm_rank_trainer_sparse"); + + m.def("cross_validate_ranking_trainer", &_cross_ranking_validate_trainer< + svm_rank_trainer<linear_kernel<sample_type> >,sample_type>, + py::arg("trainer"), py::arg("samples"), py::arg("folds") ); + m.def("cross_validate_ranking_trainer", &_cross_ranking_validate_trainer< + svm_rank_trainer<sparse_linear_kernel<sparse_vect> > ,sparse_vect>, + py::arg("trainer"), py::arg("samples"), py::arg("folds") ); +} + + + diff --git a/ml/dlib/tools/python/src/svm_struct.cpp b/ml/dlib/tools/python/src/svm_struct.cpp new file mode 100644 index 00000000..d8ebad95 --- /dev/null +++ b/ml/dlib/tools/python/src/svm_struct.cpp @@ -0,0 +1,151 @@ +// Copyright (C) 2013 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. + +#include "opaque_types.h" +#include <dlib/python.h> +#include <dlib/matrix.h> +#include <dlib/svm.h> + +using namespace dlib; +using namespace std; +namespace py = pybind11; + +template <typename psi_type> +class svm_struct_prob : public structural_svm_problem<matrix<double,0,1>, psi_type> +{ + typedef structural_svm_problem<matrix<double,0,1>, psi_type> base; + typedef typename base::feature_vector_type feature_vector_type; + typedef typename base::matrix_type matrix_type; + typedef typename base::scalar_type scalar_type; +public: + svm_struct_prob ( + py::object& problem_, + long num_dimensions_, + long num_samples_ + ) : + num_dimensions(num_dimensions_), + num_samples(num_samples_), + problem(problem_) + {} + + virtual long get_num_dimensions ( + ) const { return num_dimensions; } + + virtual long get_num_samples ( + ) const { return num_samples; } + + virtual void get_truth_joint_feature_vector ( + long idx, + feature_vector_type& psi + ) const + { + psi = problem.attr("get_truth_joint_feature_vector")(idx).template cast<feature_vector_type&>(); + } + + virtual void separation_oracle ( + const long idx, + const matrix_type& current_solution, + scalar_type& loss, + feature_vector_type& psi + ) const + { + py::object res = problem.attr("separation_oracle")(idx,std::ref(current_solution)); + pyassert(len(res) == 2, "separation_oracle() must return two objects, the loss and the psi vector"); + py::tuple t = res.cast<py::tuple>(); + // let the user supply the output arguments in any order. + try { + loss = t[0].cast<scalar_type>(); + psi = t[1].cast<feature_vector_type&>(); + } catch(py::cast_error &e) { + psi = t[0].cast<feature_vector_type&>(); + loss = t[1].cast<scalar_type>(); + } + } + +private: + + const long num_dimensions; + const long num_samples; + py::object& problem; +}; + +// ---------------------------------------------------------------------------------------- + +template <typename psi_type> +matrix<double,0,1> solve_structural_svm_problem_impl( + py::object problem +) +{ + const double C = problem.attr("C").cast<double>(); + const bool be_verbose = py::hasattr(problem,"be_verbose") && problem.attr("be_verbose").cast<bool>(); + const bool use_sparse_feature_vectors = py::hasattr(problem,"use_sparse_feature_vectors") && + problem.attr("use_sparse_feature_vectors").cast<bool>(); + const bool learns_nonnegative_weights = py::hasattr(problem,"learns_nonnegative_weights") && + problem.attr("learns_nonnegative_weights").cast<bool>(); + + double eps = 0.001; + unsigned long max_cache_size = 10; + if (py::hasattr(problem, "epsilon")) + eps = problem.attr("epsilon").cast<double>(); + if (py::hasattr(problem, "max_cache_size")) + max_cache_size = problem.attr("max_cache_size").cast<double>(); + + const long num_samples = problem.attr("num_samples").cast<long>(); + const long num_dimensions = problem.attr("num_dimensions").cast<long>(); + + pyassert(num_samples > 0, "You can't train a Structural-SVM if you don't have any training samples."); + + if (be_verbose) + { + cout << "C: " << C << endl; + cout << "epsilon: " << eps << endl; + cout << "max_cache_size: " << max_cache_size << endl; + cout << "num_samples: " << num_samples << endl; + cout << "num_dimensions: " << num_dimensions << endl; + cout << "use_sparse_feature_vectors: " << std::boolalpha << use_sparse_feature_vectors << endl; + cout << "learns_nonnegative_weights: " << std::boolalpha << learns_nonnegative_weights << endl; + cout << endl; + } + + svm_struct_prob<psi_type> prob(problem, num_dimensions, num_samples); + prob.set_c(C); + prob.set_epsilon(eps); + prob.set_max_cache_size(max_cache_size); + if (be_verbose) + prob.be_verbose(); + + oca solver; + matrix<double,0,1> w; + if (learns_nonnegative_weights) + solver(prob, w, prob.get_num_dimensions()); + else + solver(prob, w); + return w; +} + +// ---------------------------------------------------------------------------------------- + +matrix<double,0,1> solve_structural_svm_problem( + py::object problem +) +{ + // Check if the python code is using sparse or dense vectors to represent PSI() + if (py::isinstance<matrix<double,0,1>>(problem.attr("get_truth_joint_feature_vector")(0))) + return solve_structural_svm_problem_impl<matrix<double,0,1> >(problem); + else + return solve_structural_svm_problem_impl<std::vector<std::pair<unsigned long,double> > >(problem); +} + +// ---------------------------------------------------------------------------------------- + +void bind_svm_struct(py::module& m) +{ + m.def("solve_structural_svm_problem",solve_structural_svm_problem, py::arg("problem"), +"This function solves a structural SVM problem and returns the weight vector \n\ +that defines the solution. See the example program python_examples/svm_struct.py \n\ +for documentation about how to create a proper problem object. " + ); +} + +// ---------------------------------------------------------------------------------------- + diff --git a/ml/dlib/tools/python/src/testing_results.h b/ml/dlib/tools/python/src/testing_results.h new file mode 100644 index 00000000..746e2934 --- /dev/null +++ b/ml/dlib/tools/python/src/testing_results.h @@ -0,0 +1,50 @@ +// Copyright (C) 2013 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_TESTING_ReSULTS_H__ +#define DLIB_TESTING_ReSULTS_H__ + +#include <dlib/matrix.h> + +struct binary_test +{ + binary_test() : class1_accuracy(0), class2_accuracy(0) {} + binary_test( + const dlib::matrix<double,1,2>& m + ) : class1_accuracy(m(0)), + class2_accuracy(m(1)) {} + + double class1_accuracy; + double class2_accuracy; +}; + +struct regression_test +{ + regression_test() = default; + regression_test( + const dlib::matrix<double,1,4>& m + ) : mean_squared_error(m(0)), + R_squared(m(1)), + mean_average_error(m(2)), + mean_error_stddev(m(3)) + {} + + double mean_squared_error = 0; + double R_squared = 0; + double mean_average_error = 0; + double mean_error_stddev = 0; +}; + +struct ranking_test +{ + ranking_test() : ranking_accuracy(0), mean_ap(0) {} + ranking_test( + const dlib::matrix<double,1,2>& m + ) : ranking_accuracy(m(0)), + mean_ap(m(1)) {} + + double ranking_accuracy; + double mean_ap; +}; + +#endif // DLIB_TESTING_ReSULTS_H__ + diff --git a/ml/dlib/tools/python/src/vector.cpp b/ml/dlib/tools/python/src/vector.cpp new file mode 100644 index 00000000..a9f81c65 --- /dev/null +++ b/ml/dlib/tools/python/src/vector.cpp @@ -0,0 +1,182 @@ +// Copyright (C) 2013 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. + +#include "opaque_types.h" +#include <dlib/python.h> +#include <dlib/matrix.h> +#include <dlib/geometry/vector.h> +#include <pybind11/stl_bind.h> +#include "indexing.h" + +using namespace dlib; +using namespace std; + +typedef matrix<double,0,1> cv; + + +void cv_set_size(cv& m, long s) +{ + m.set_size(s); + m = 0; +} + +double dotprod ( const cv& a, const cv& b) +{ + return dot(a,b); +} + +string cv__str__(const cv& v) +{ + ostringstream sout; + for (long i = 0; i < v.size(); ++i) + { + sout << v(i); + if (i+1 < v.size()) + sout << "\n"; + } + return sout.str(); +} + +string cv__repr__ (const cv& v) +{ + std::ostringstream sout; + sout << "dlib.vector(["; + for (long i = 0; i < v.size(); ++i) + { + sout << v(i); + if (i+1 < v.size()) + sout << ", "; + } + sout << "])"; + return sout.str(); +} + +std::shared_ptr<cv> cv_from_object(py::object obj) +{ + try { + long nr = obj.cast<long>(); + auto temp = std::make_shared<cv>(nr); + *temp = 0; + return temp; + } catch(py::cast_error &e) { + py::list li = obj.cast<py::list>(); + const long nr = len(obj); + auto temp = std::make_shared<cv>(nr); + for ( long r = 0; r < nr; ++r) + { + (*temp)(r) = li[r].cast<double>(); + } + return temp; + } +} + +long cv__len__(cv& c) +{ + return c.size(); +} + + +void cv__setitem__(cv& c, long p, double val) +{ + if (p < 0) { + p = c.size() + p; // negative index + } + if (p > c.size()-1) { + PyErr_SetString( PyExc_IndexError, "index out of range" + ); + throw py::error_already_set(); + } + c(p) = val; +} + +double cv__getitem__(cv& m, long r) +{ + if (r < 0) { + r = m.size() + r; // negative index + } + if (r > m.size()-1 || r < 0) { + PyErr_SetString( PyExc_IndexError, "index out of range" + ); + throw py::error_already_set(); + } + return m(r); +} + + +cv cv__getitem2__(cv& m, py::slice r) +{ + size_t start, stop, step, slicelength; + if (!r.compute(m.size(), &start, &stop, &step, &slicelength)) + throw py::error_already_set(); + + cv temp(slicelength); + + for (size_t i = 0; i < slicelength; ++i) { + temp(i) = m(start); start += step; + } + return temp; +} + +py::tuple cv_get_matrix_size(cv& m) +{ + return py::make_tuple(m.nr(), m.nc()); +} + +// ---------------------------------------------------------------------------------------- + +string point__repr__ (const point& p) +{ + std::ostringstream sout; + sout << "point(" << p.x() << ", " << p.y() << ")"; + return sout.str(); +} + +string point__str__(const point& p) +{ + std::ostringstream sout; + sout << "(" << p.x() << ", " << p.y() << ")"; + return sout.str(); +} + +long point_x(const point& p) { return p.x(); } +long point_y(const point& p) { return p.y(); } + +// ---------------------------------------------------------------------------------------- +void bind_vector(py::module& m) +{ + { + py::class_<cv, std::shared_ptr<cv>>(m, "vector", "This object represents the mathematical idea of a column vector.") + .def(py::init()) + .def("set_size", &cv_set_size) + .def("resize", &cv_set_size) + .def(py::init(&cv_from_object)) + .def("__repr__", &cv__repr__) + .def("__str__", &cv__str__) + .def("__len__", &cv__len__) + .def("__getitem__", &cv__getitem__) + .def("__getitem__", &cv__getitem2__) + .def("__setitem__", &cv__setitem__) + .def_property_readonly("shape", &cv_get_matrix_size) + .def(py::pickle(&getstate<cv>, &setstate<cv>)); + + m.def("dot", &dotprod, "Compute the dot product between two dense column vectors."); + } + { + typedef point type; + py::class_<type>(m, "point", "This object represents a single point of integer coordinates that maps directly to a dlib::point.") + .def(py::init<long,long>(), py::arg("x"), py::arg("y")) + .def("__repr__", &point__repr__) + .def("__str__", &point__str__) + .def_property("x", &point_x, [](point& p, long x){p.x()=x;}, "The x-coordinate of the point.") + .def_property("y", &point_y, [](point& p, long y){p.x()=y;}, "The y-coordinate of the point.") + .def(py::pickle(&getstate<type>, &setstate<type>)); + } + { + typedef std::vector<point> type; + py::bind_vector<type>(m, "points", "An array of point objects.") + .def("clear", &type::clear) + .def("resize", resize<type>) + .def("extend", extend_vector_with_python_list<point>) + .def(py::pickle(&getstate<type>, &setstate<type>)); + } +} |