diff options
Diffstat (limited to '')
-rw-r--r-- | ml/dlib/dlib/image_processing/correlation_tracker.h | 404 |
1 files changed, 404 insertions, 0 deletions
diff --git a/ml/dlib/dlib/image_processing/correlation_tracker.h b/ml/dlib/dlib/image_processing/correlation_tracker.h new file mode 100644 index 000000000..f005ddc7b --- /dev/null +++ b/ml/dlib/dlib/image_processing/correlation_tracker.h @@ -0,0 +1,404 @@ +// Copyright (C) 2015 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_CORRELATION_TrACKER_H_ +#define DLIB_CORRELATION_TrACKER_H_ + +#include "correlation_tracker_abstract.h" +#include "../geometry.h" +#include "../matrix.h" +#include "../array2d.h" +#include "../image_transforms/assign_image.h" +#include "../image_transforms/interpolation.h" + + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + class correlation_tracker + { + public: + + explicit correlation_tracker (unsigned long filter_size = 6, + unsigned long num_scale_levels = 5, + unsigned long scale_window_size = 23, + double regularizer_space = 0.001, + double nu_space = 0.025, + double regularizer_scale = 0.001, + double nu_scale = 0.025, + double scale_pyramid_alpha = 1.020 + ) + : filter_size(1 << filter_size), num_scale_levels(1 << num_scale_levels), + scale_window_size(scale_window_size), + regularizer_space(regularizer_space), nu_space(nu_space), + regularizer_scale(regularizer_scale), nu_scale(nu_scale), + scale_pyramid_alpha(scale_pyramid_alpha) + { + // Create the cosine mask used for space filtering. + mask = make_cosine_mask(); + + // Create the cosine mask used for the scale filtering. + scale_cos_mask.resize(get_num_scale_levels()); + const long max_level = get_num_scale_levels()/2; + for (unsigned long k = 0; k < get_num_scale_levels(); ++k) + { + double dist = std::abs((double)k-max_level)/max_level*pi/2; + dist = std::min(dist, pi/2); + scale_cos_mask[k] = std::cos(dist); + } + } + + template <typename image_type> + void start_track ( + const image_type& img, + const drectangle& p + ) + { + DLIB_CASSERT(p.is_empty() == false, + "\t void correlation_tracker::start_track()" + << "\n\t You can't give an empty rectangle." + ); + + B.set_size(0,0); + + point_transform_affine tform = inv(make_chip(img, p, F)); + for (unsigned long i = 0; i < F.size(); ++i) + fft_inplace(F[i]); + make_target_location_image(tform(center(p)), G); + A.resize(F.size()); + for (unsigned long i = 0; i < F.size(); ++i) + { + A[i] = pointwise_multiply(G, F[i]); + B += squared(real(F[i]))+squared(imag(F[i])); + } + + position = p; + + // now do the scale space stuff + make_scale_space(img, Fs); + for (unsigned long i = 0; i < Fs.size(); ++i) + fft_inplace(Fs[i]); + make_scale_target_location_image(get_num_scale_levels()/2, Gs); + Bs.set_size(0); + As.resize(Fs.size()); + for (unsigned long i = 0; i < Fs.size(); ++i) + { + As[i] = pointwise_multiply(Gs, Fs[i]); + Bs += squared(real(Fs[i]))+squared(imag(Fs[i])); + } + } + + + unsigned long get_filter_size ( + ) const { return filter_size; } + + unsigned long get_num_scale_levels( + ) const { return num_scale_levels; } + + unsigned long get_scale_window_size ( + ) const { return scale_window_size; } + + double get_regularizer_space ( + ) const { return regularizer_space; } + inline double get_nu_space ( + ) const { return nu_space;} + + double get_regularizer_scale ( + ) const { return regularizer_scale; } + double get_nu_scale ( + ) const { return nu_scale;} + + drectangle get_position ( + ) const + { + return position; + } + + double get_scale_pyramid_alpha ( + ) const { return scale_pyramid_alpha; } + + + template <typename image_type> + double update_noscale( + const image_type& img, + const drectangle& guess + ) + { + DLIB_CASSERT(get_position().is_empty() == false, + "\t double correlation_tracker::update()" + << "\n\t You must call start_track() first before calling update()." + ); + + + const point_transform_affine tform = make_chip(img, guess, F); + for (unsigned long i = 0; i < F.size(); ++i) + fft_inplace(F[i]); + + // use the current filter to predict the object's location + G = 0; + for (unsigned long i = 0; i < F.size(); ++i) + G += pointwise_multiply(F[i],conj(A[i])); + G = pointwise_multiply(G, reciprocal(B+get_regularizer_space())); + ifft_inplace(G); + const dlib::vector<double,2> pp = max_point_interpolated(real(G)); + + + // Compute the peak to side lobe ratio. + const point p = pp; + running_stats<double> rs; + const rectangle peak = centered_rect(p, 8,8); + for (long r = 0; r < G.nr(); ++r) + { + for (long c = 0; c < G.nc(); ++c) + { + if (!peak.contains(point(c,r))) + rs.add(G(r,c).real()); + } + } + const double psr = (G(p.y(),p.x()).real()-rs.mean())/rs.stddev(); + + // update the position of the object + position = translate_rect(guess, tform(pp)-center(guess)); + + // now update the position filters + make_target_location_image(pp, G); + B *= (1-get_nu_space()); + for (unsigned long i = 0; i < F.size(); ++i) + { + A[i] = get_nu_space()*pointwise_multiply(G, F[i]) + (1-get_nu_space())*A[i]; + B += get_nu_space()*(squared(real(F[i]))+squared(imag(F[i]))); + } + + return psr; + } + + template <typename image_type> + double update ( + const image_type& img, + const drectangle& guess + ) + { + double psr = update_noscale(img, guess); + + // Now predict the scale change + make_scale_space(img, Fs); + for (unsigned long i = 0; i < Fs.size(); ++i) + fft_inplace(Fs[i]); + Gs = 0; + for (unsigned long i = 0; i < Fs.size(); ++i) + Gs += pointwise_multiply(Fs[i],conj(As[i])); + Gs = pointwise_multiply(Gs, reciprocal(Bs+get_regularizer_scale())); + ifft_inplace(Gs); + const double pos = max_point_interpolated(real(Gs)).y(); + + // update the rectangle's scale + position *= std::pow(get_scale_pyramid_alpha(), pos-(double)get_num_scale_levels()/2); + + + + // Now update the scale filters + make_scale_target_location_image(pos, Gs); + Bs *= (1-get_nu_scale()); + for (unsigned long i = 0; i < Fs.size(); ++i) + { + As[i] = get_nu_scale()*pointwise_multiply(Gs, Fs[i]) + (1-get_nu_scale())*As[i]; + Bs += get_nu_scale()*(squared(real(Fs[i]))+squared(imag(Fs[i]))); + } + + + return psr; + } + + template <typename image_type> + double update_noscale ( + const image_type& img + ) + { + return update_noscale(img, get_position()); + } + + template <typename image_type> + double update( + const image_type& img + ) + { + return update(img, get_position()); + } + + private: + + template <typename image_type> + void make_scale_space( + const image_type& img, + std::vector<matrix<std::complex<double>,0,1> >& Fs + ) const + { + typedef typename image_traits<image_type>::pixel_type pixel_type; + + // Make an image pyramid and put it into the chips array. + const long chip_size = get_scale_window_size(); + drectangle ppp = position*std::pow(get_scale_pyramid_alpha(), -(double)get_num_scale_levels()/2); + dlib::array<array2d<pixel_type> > chips; + std::vector<dlib::vector<double,2> > from_points, to_points; + from_points.push_back(point(0,0)); + from_points.push_back(point(chip_size-1,0)); + from_points.push_back(point(chip_size-1,chip_size-1)); + for (unsigned long i = 0; i < get_num_scale_levels(); ++i) + { + array2d<pixel_type> chip(chip_size,chip_size); + + // pull box into chip + to_points.clear(); + to_points.push_back(ppp.tl_corner()); + to_points.push_back(ppp.tr_corner()); + to_points.push_back(ppp.br_corner()); + transform_image(img,chip,interpolate_bilinear(),find_affine_transform(from_points, to_points)); + + chips.push_back(chip); + ppp *= get_scale_pyramid_alpha(); + } + + + // extract HOG for each chip + dlib::array<dlib::array<array2d<float> > > hogs(chips.size()); + for (unsigned long i = 0; i < chips.size(); ++i) + { + extract_fhog_features(chips[i], hogs[i], 4); + hogs[i].resize(32); + assign_image(hogs[i][31], chips[i]); + assign_image(hogs[i][31], mat(hogs[i][31])/255.0); + } + + // Now copy the hog features into the Fs outputs and also apply the cosine + // windowing. + Fs.resize(hogs[0].size()*hogs[0][0].size()); + unsigned long i = 0; + for (long r = 0; r < hogs[0][0].nr(); ++r) + { + for (long c = 0; c < hogs[0][0].nc(); ++c) + { + for (unsigned long j = 0; j < hogs[0].size(); ++j) + { + Fs[i].set_size(hogs.size()); + for (unsigned long k = 0; k < hogs.size(); ++k) + { + Fs[i](k) = hogs[k][j][r][c]*scale_cos_mask[k]; + } + ++i; + } + } + } + } + + template <typename image_type> + point_transform_affine make_chip ( + const image_type& img, + drectangle p, + std::vector<matrix<std::complex<double> > >& chip + ) const + { + typedef typename image_traits<image_type>::pixel_type pixel_type; + array2d<pixel_type> temp; + const double padding = 1.4; + const chip_details details(p*padding, chip_dims(get_filter_size(), get_filter_size())); + extract_image_chip(img, details, temp); + + + chip.resize(32); + dlib::array<array2d<float> > hog; + extract_fhog_features(temp, hog, 1, 3,3 ); + for (unsigned long i = 0; i < hog.size(); ++i) + assign_image(chip[i], pointwise_multiply(matrix_cast<double>(mat(hog[i])), mask)); + + assign_image(chip[31], temp); + assign_image(chip[31], pointwise_multiply(mat(chip[31]), mask)/255.0); + + return inv(get_mapping_to_chip(details)); + } + + void make_target_location_image ( + const dlib::vector<double,2>& p, + matrix<std::complex<double> >& g + ) const + { + g.set_size(get_filter_size(), get_filter_size()); + g = 0; + rectangle area = centered_rect(p, 21,21).intersect(get_rect(g)); + for (long r = area.top(); r <= area.bottom(); ++r) + { + for (long c = area.left(); c <= area.right(); ++c) + { + double dist = length(point(c,r)-p); + g(r,c) = std::exp(-dist/3.0); + } + } + fft_inplace(g); + g = conj(g); + } + + + void make_scale_target_location_image ( + const double scale, + matrix<std::complex<double>,0,1>& g + ) const + { + g.set_size(get_num_scale_levels()); + for (long i = 0; i < g.size(); ++i) + { + double dist = std::pow((i-scale),2.0); + g(i) = std::exp(-dist/1.000); + } + fft_inplace(g); + g = conj(g); + } + + matrix<double> make_cosine_mask ( + ) const + { + const long size = get_filter_size(); + matrix<double> temp(size,size); + point cent = center(get_rect(temp)); + for (long r = 0; r < temp.nr(); ++r) + { + for (long c = 0; c < temp.nc(); ++c) + { + point delta = point(c,r)-cent; + double dist = length(delta)/(size/2.0)*(pi/2); + dist = std::min(dist*1.0, pi/2); + + temp(r,c) = std::cos(dist); + } + } + return temp; + } + + + std::vector<matrix<std::complex<double> > > A, F; + matrix<double> B; + + std::vector<matrix<std::complex<double>,0,1> > As, Fs; + matrix<double,0,1> Bs; + drectangle position; + + matrix<double> mask; + std::vector<double> scale_cos_mask; + + // G and Gs do not logically contribute to the state of this object. They are + // here just so we can void reallocating them over and over. + matrix<std::complex<double> > G; + matrix<std::complex<double>,0,1> Gs; + + unsigned long filter_size; + unsigned long num_scale_levels; + unsigned long scale_window_size; + double regularizer_space; + double nu_space; + double regularizer_scale; + double nu_scale; + double scale_pyramid_alpha; + }; +} + +#endif // DLIB_CORRELATION_TrACKER_H_ + |