summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/image_processing/correlation_tracker.h
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--ml/dlib/dlib/image_processing/correlation_tracker.h404
1 files changed, 404 insertions, 0 deletions
diff --git a/ml/dlib/dlib/image_processing/correlation_tracker.h b/ml/dlib/dlib/image_processing/correlation_tracker.h
new file mode 100644
index 000000000..f005ddc7b
--- /dev/null
+++ b/ml/dlib/dlib/image_processing/correlation_tracker.h
@@ -0,0 +1,404 @@
+// Copyright (C) 2015 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_CORRELATION_TrACKER_H_
+#define DLIB_CORRELATION_TrACKER_H_
+
+#include "correlation_tracker_abstract.h"
+#include "../geometry.h"
+#include "../matrix.h"
+#include "../array2d.h"
+#include "../image_transforms/assign_image.h"
+#include "../image_transforms/interpolation.h"
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ class correlation_tracker
+ {
+ public:
+
+ explicit correlation_tracker (unsigned long filter_size = 6,
+ unsigned long num_scale_levels = 5,
+ unsigned long scale_window_size = 23,
+ double regularizer_space = 0.001,
+ double nu_space = 0.025,
+ double regularizer_scale = 0.001,
+ double nu_scale = 0.025,
+ double scale_pyramid_alpha = 1.020
+ )
+ : filter_size(1 << filter_size), num_scale_levels(1 << num_scale_levels),
+ scale_window_size(scale_window_size),
+ regularizer_space(regularizer_space), nu_space(nu_space),
+ regularizer_scale(regularizer_scale), nu_scale(nu_scale),
+ scale_pyramid_alpha(scale_pyramid_alpha)
+ {
+ // Create the cosine mask used for space filtering.
+ mask = make_cosine_mask();
+
+ // Create the cosine mask used for the scale filtering.
+ scale_cos_mask.resize(get_num_scale_levels());
+ const long max_level = get_num_scale_levels()/2;
+ for (unsigned long k = 0; k < get_num_scale_levels(); ++k)
+ {
+ double dist = std::abs((double)k-max_level)/max_level*pi/2;
+ dist = std::min(dist, pi/2);
+ scale_cos_mask[k] = std::cos(dist);
+ }
+ }
+
+ template <typename image_type>
+ void start_track (
+ const image_type& img,
+ const drectangle& p
+ )
+ {
+ DLIB_CASSERT(p.is_empty() == false,
+ "\t void correlation_tracker::start_track()"
+ << "\n\t You can't give an empty rectangle."
+ );
+
+ B.set_size(0,0);
+
+ point_transform_affine tform = inv(make_chip(img, p, F));
+ for (unsigned long i = 0; i < F.size(); ++i)
+ fft_inplace(F[i]);
+ make_target_location_image(tform(center(p)), G);
+ A.resize(F.size());
+ for (unsigned long i = 0; i < F.size(); ++i)
+ {
+ A[i] = pointwise_multiply(G, F[i]);
+ B += squared(real(F[i]))+squared(imag(F[i]));
+ }
+
+ position = p;
+
+ // now do the scale space stuff
+ make_scale_space(img, Fs);
+ for (unsigned long i = 0; i < Fs.size(); ++i)
+ fft_inplace(Fs[i]);
+ make_scale_target_location_image(get_num_scale_levels()/2, Gs);
+ Bs.set_size(0);
+ As.resize(Fs.size());
+ for (unsigned long i = 0; i < Fs.size(); ++i)
+ {
+ As[i] = pointwise_multiply(Gs, Fs[i]);
+ Bs += squared(real(Fs[i]))+squared(imag(Fs[i]));
+ }
+ }
+
+
+ unsigned long get_filter_size (
+ ) const { return filter_size; }
+
+ unsigned long get_num_scale_levels(
+ ) const { return num_scale_levels; }
+
+ unsigned long get_scale_window_size (
+ ) const { return scale_window_size; }
+
+ double get_regularizer_space (
+ ) const { return regularizer_space; }
+ inline double get_nu_space (
+ ) const { return nu_space;}
+
+ double get_regularizer_scale (
+ ) const { return regularizer_scale; }
+ double get_nu_scale (
+ ) const { return nu_scale;}
+
+ drectangle get_position (
+ ) const
+ {
+ return position;
+ }
+
+ double get_scale_pyramid_alpha (
+ ) const { return scale_pyramid_alpha; }
+
+
+ template <typename image_type>
+ double update_noscale(
+ const image_type& img,
+ const drectangle& guess
+ )
+ {
+ DLIB_CASSERT(get_position().is_empty() == false,
+ "\t double correlation_tracker::update()"
+ << "\n\t You must call start_track() first before calling update()."
+ );
+
+
+ const point_transform_affine tform = make_chip(img, guess, F);
+ for (unsigned long i = 0; i < F.size(); ++i)
+ fft_inplace(F[i]);
+
+ // use the current filter to predict the object's location
+ G = 0;
+ for (unsigned long i = 0; i < F.size(); ++i)
+ G += pointwise_multiply(F[i],conj(A[i]));
+ G = pointwise_multiply(G, reciprocal(B+get_regularizer_space()));
+ ifft_inplace(G);
+ const dlib::vector<double,2> pp = max_point_interpolated(real(G));
+
+
+ // Compute the peak to side lobe ratio.
+ const point p = pp;
+ running_stats<double> rs;
+ const rectangle peak = centered_rect(p, 8,8);
+ for (long r = 0; r < G.nr(); ++r)
+ {
+ for (long c = 0; c < G.nc(); ++c)
+ {
+ if (!peak.contains(point(c,r)))
+ rs.add(G(r,c).real());
+ }
+ }
+ const double psr = (G(p.y(),p.x()).real()-rs.mean())/rs.stddev();
+
+ // update the position of the object
+ position = translate_rect(guess, tform(pp)-center(guess));
+
+ // now update the position filters
+ make_target_location_image(pp, G);
+ B *= (1-get_nu_space());
+ for (unsigned long i = 0; i < F.size(); ++i)
+ {
+ A[i] = get_nu_space()*pointwise_multiply(G, F[i]) + (1-get_nu_space())*A[i];
+ B += get_nu_space()*(squared(real(F[i]))+squared(imag(F[i])));
+ }
+
+ return psr;
+ }
+
+ template <typename image_type>
+ double update (
+ const image_type& img,
+ const drectangle& guess
+ )
+ {
+ double psr = update_noscale(img, guess);
+
+ // Now predict the scale change
+ make_scale_space(img, Fs);
+ for (unsigned long i = 0; i < Fs.size(); ++i)
+ fft_inplace(Fs[i]);
+ Gs = 0;
+ for (unsigned long i = 0; i < Fs.size(); ++i)
+ Gs += pointwise_multiply(Fs[i],conj(As[i]));
+ Gs = pointwise_multiply(Gs, reciprocal(Bs+get_regularizer_scale()));
+ ifft_inplace(Gs);
+ const double pos = max_point_interpolated(real(Gs)).y();
+
+ // update the rectangle's scale
+ position *= std::pow(get_scale_pyramid_alpha(), pos-(double)get_num_scale_levels()/2);
+
+
+
+ // Now update the scale filters
+ make_scale_target_location_image(pos, Gs);
+ Bs *= (1-get_nu_scale());
+ for (unsigned long i = 0; i < Fs.size(); ++i)
+ {
+ As[i] = get_nu_scale()*pointwise_multiply(Gs, Fs[i]) + (1-get_nu_scale())*As[i];
+ Bs += get_nu_scale()*(squared(real(Fs[i]))+squared(imag(Fs[i])));
+ }
+
+
+ return psr;
+ }
+
+ template <typename image_type>
+ double update_noscale (
+ const image_type& img
+ )
+ {
+ return update_noscale(img, get_position());
+ }
+
+ template <typename image_type>
+ double update(
+ const image_type& img
+ )
+ {
+ return update(img, get_position());
+ }
+
+ private:
+
+ template <typename image_type>
+ void make_scale_space(
+ const image_type& img,
+ std::vector<matrix<std::complex<double>,0,1> >& Fs
+ ) const
+ {
+ typedef typename image_traits<image_type>::pixel_type pixel_type;
+
+ // Make an image pyramid and put it into the chips array.
+ const long chip_size = get_scale_window_size();
+ drectangle ppp = position*std::pow(get_scale_pyramid_alpha(), -(double)get_num_scale_levels()/2);
+ dlib::array<array2d<pixel_type> > chips;
+ std::vector<dlib::vector<double,2> > from_points, to_points;
+ from_points.push_back(point(0,0));
+ from_points.push_back(point(chip_size-1,0));
+ from_points.push_back(point(chip_size-1,chip_size-1));
+ for (unsigned long i = 0; i < get_num_scale_levels(); ++i)
+ {
+ array2d<pixel_type> chip(chip_size,chip_size);
+
+ // pull box into chip
+ to_points.clear();
+ to_points.push_back(ppp.tl_corner());
+ to_points.push_back(ppp.tr_corner());
+ to_points.push_back(ppp.br_corner());
+ transform_image(img,chip,interpolate_bilinear(),find_affine_transform(from_points, to_points));
+
+ chips.push_back(chip);
+ ppp *= get_scale_pyramid_alpha();
+ }
+
+
+ // extract HOG for each chip
+ dlib::array<dlib::array<array2d<float> > > hogs(chips.size());
+ for (unsigned long i = 0; i < chips.size(); ++i)
+ {
+ extract_fhog_features(chips[i], hogs[i], 4);
+ hogs[i].resize(32);
+ assign_image(hogs[i][31], chips[i]);
+ assign_image(hogs[i][31], mat(hogs[i][31])/255.0);
+ }
+
+ // Now copy the hog features into the Fs outputs and also apply the cosine
+ // windowing.
+ Fs.resize(hogs[0].size()*hogs[0][0].size());
+ unsigned long i = 0;
+ for (long r = 0; r < hogs[0][0].nr(); ++r)
+ {
+ for (long c = 0; c < hogs[0][0].nc(); ++c)
+ {
+ for (unsigned long j = 0; j < hogs[0].size(); ++j)
+ {
+ Fs[i].set_size(hogs.size());
+ for (unsigned long k = 0; k < hogs.size(); ++k)
+ {
+ Fs[i](k) = hogs[k][j][r][c]*scale_cos_mask[k];
+ }
+ ++i;
+ }
+ }
+ }
+ }
+
+ template <typename image_type>
+ point_transform_affine make_chip (
+ const image_type& img,
+ drectangle p,
+ std::vector<matrix<std::complex<double> > >& chip
+ ) const
+ {
+ typedef typename image_traits<image_type>::pixel_type pixel_type;
+ array2d<pixel_type> temp;
+ const double padding = 1.4;
+ const chip_details details(p*padding, chip_dims(get_filter_size(), get_filter_size()));
+ extract_image_chip(img, details, temp);
+
+
+ chip.resize(32);
+ dlib::array<array2d<float> > hog;
+ extract_fhog_features(temp, hog, 1, 3,3 );
+ for (unsigned long i = 0; i < hog.size(); ++i)
+ assign_image(chip[i], pointwise_multiply(matrix_cast<double>(mat(hog[i])), mask));
+
+ assign_image(chip[31], temp);
+ assign_image(chip[31], pointwise_multiply(mat(chip[31]), mask)/255.0);
+
+ return inv(get_mapping_to_chip(details));
+ }
+
+ void make_target_location_image (
+ const dlib::vector<double,2>& p,
+ matrix<std::complex<double> >& g
+ ) const
+ {
+ g.set_size(get_filter_size(), get_filter_size());
+ g = 0;
+ rectangle area = centered_rect(p, 21,21).intersect(get_rect(g));
+ for (long r = area.top(); r <= area.bottom(); ++r)
+ {
+ for (long c = area.left(); c <= area.right(); ++c)
+ {
+ double dist = length(point(c,r)-p);
+ g(r,c) = std::exp(-dist/3.0);
+ }
+ }
+ fft_inplace(g);
+ g = conj(g);
+ }
+
+
+ void make_scale_target_location_image (
+ const double scale,
+ matrix<std::complex<double>,0,1>& g
+ ) const
+ {
+ g.set_size(get_num_scale_levels());
+ for (long i = 0; i < g.size(); ++i)
+ {
+ double dist = std::pow((i-scale),2.0);
+ g(i) = std::exp(-dist/1.000);
+ }
+ fft_inplace(g);
+ g = conj(g);
+ }
+
+ matrix<double> make_cosine_mask (
+ ) const
+ {
+ const long size = get_filter_size();
+ matrix<double> temp(size,size);
+ point cent = center(get_rect(temp));
+ for (long r = 0; r < temp.nr(); ++r)
+ {
+ for (long c = 0; c < temp.nc(); ++c)
+ {
+ point delta = point(c,r)-cent;
+ double dist = length(delta)/(size/2.0)*(pi/2);
+ dist = std::min(dist*1.0, pi/2);
+
+ temp(r,c) = std::cos(dist);
+ }
+ }
+ return temp;
+ }
+
+
+ std::vector<matrix<std::complex<double> > > A, F;
+ matrix<double> B;
+
+ std::vector<matrix<std::complex<double>,0,1> > As, Fs;
+ matrix<double,0,1> Bs;
+ drectangle position;
+
+ matrix<double> mask;
+ std::vector<double> scale_cos_mask;
+
+ // G and Gs do not logically contribute to the state of this object. They are
+ // here just so we can void reallocating them over and over.
+ matrix<std::complex<double> > G;
+ matrix<std::complex<double>,0,1> Gs;
+
+ unsigned long filter_size;
+ unsigned long num_scale_levels;
+ unsigned long scale_window_size;
+ double regularizer_space;
+ double nu_space;
+ double regularizer_scale;
+ double nu_scale;
+ double scale_pyramid_alpha;
+ };
+}
+
+#endif // DLIB_CORRELATION_TrACKER_H_
+