summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/image_transforms
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/dlib/image_transforms')
-rw-r--r--ml/dlib/dlib/image_transforms/assign_image.h385
-rw-r--r--ml/dlib/dlib/image_transforms/assign_image_abstract.h196
-rw-r--r--ml/dlib/dlib/image_transforms/colormaps.h269
-rw-r--r--ml/dlib/dlib/image_transforms/colormaps_abstract.h152
-rw-r--r--ml/dlib/dlib/image_transforms/draw.h396
-rw-r--r--ml/dlib/dlib/image_transforms/draw_abstract.h150
-rw-r--r--ml/dlib/dlib/image_transforms/edge_detector.h302
-rw-r--r--ml/dlib/dlib/image_transforms/edge_detector_abstract.h112
-rw-r--r--ml/dlib/dlib/image_transforms/equalize_histogram.h143
-rw-r--r--ml/dlib/dlib/image_transforms/equalize_histogram_abstract.h91
-rw-r--r--ml/dlib/dlib/image_transforms/fhog.h1404
-rw-r--r--ml/dlib/dlib/image_transforms/fhog_abstract.h346
-rw-r--r--ml/dlib/dlib/image_transforms/hough_transform.h358
-rw-r--r--ml/dlib/dlib/image_transforms/hough_transform_abstract.h145
-rw-r--r--ml/dlib/dlib/image_transforms/image_pyramid.h1238
-rw-r--r--ml/dlib/dlib/image_transforms/image_pyramid_abstract.h384
-rw-r--r--ml/dlib/dlib/image_transforms/integral_image.h190
-rw-r--r--ml/dlib/dlib/image_transforms/integral_image_abstract.h169
-rw-r--r--ml/dlib/dlib/image_transforms/interpolation.h2193
-rw-r--r--ml/dlib/dlib/image_transforms/interpolation_abstract.h1480
-rw-r--r--ml/dlib/dlib/image_transforms/label_connected_blobs.h188
-rw-r--r--ml/dlib/dlib/image_transforms/label_connected_blobs_abstract.h199
-rw-r--r--ml/dlib/dlib/image_transforms/lbp.h307
-rw-r--r--ml/dlib/dlib/image_transforms/lbp_abstract.h139
-rw-r--r--ml/dlib/dlib/image_transforms/morphological_operations.h846
-rw-r--r--ml/dlib/dlib/image_transforms/morphological_operations_abstract.h316
-rw-r--r--ml/dlib/dlib/image_transforms/random_color_transform.h157
-rw-r--r--ml/dlib/dlib/image_transforms/random_color_transform_abstract.h94
-rw-r--r--ml/dlib/dlib/image_transforms/random_cropper.h361
-rw-r--r--ml/dlib/dlib/image_transforms/random_cropper_abstract.h346
-rw-r--r--ml/dlib/dlib/image_transforms/segment_image.h730
-rw-r--r--ml/dlib/dlib/image_transforms/segment_image_abstract.h126
-rw-r--r--ml/dlib/dlib/image_transforms/spatial_filtering.h1580
-rw-r--r--ml/dlib/dlib/image_transforms/spatial_filtering_abstract.h487
-rw-r--r--ml/dlib/dlib/image_transforms/thresholding.h340
-rw-r--r--ml/dlib/dlib/image_transforms/thresholding_abstract.h139
36 files changed, 16458 insertions, 0 deletions
diff --git a/ml/dlib/dlib/image_transforms/assign_image.h b/ml/dlib/dlib/image_transforms/assign_image.h
new file mode 100644
index 000000000..c69878efa
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/assign_image.h
@@ -0,0 +1,385 @@
+// Copyright (C) 2007 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_ASSIGN_IMAGe_
+#define DLIB_ASSIGN_IMAGe_
+
+#include "../pixel.h"
+#include "assign_image_abstract.h"
+#include "../statistics.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename dest_image_type,
+ typename src_image_type
+ >
+ void impl_assign_image (
+ image_view<dest_image_type>& dest,
+ const src_image_type& src
+ )
+ {
+ dest.set_size(src.nr(),src.nc());
+ for (long r = 0; r < src.nr(); ++r)
+ {
+ for (long c = 0; c < src.nc(); ++c)
+ {
+ assign_pixel(dest[r][c], src(r,c));
+ }
+ }
+ }
+
+ template <
+ typename dest_image_type,
+ typename src_image_type
+ >
+ void impl_assign_image (
+ dest_image_type& dest_,
+ const src_image_type& src
+ )
+ {
+ image_view<dest_image_type> dest(dest_);
+ impl_assign_image(dest, src);
+ }
+
+ template <
+ typename dest_image_type,
+ typename src_image_type
+ >
+ void assign_image (
+ dest_image_type& dest,
+ const src_image_type& src
+ )
+ {
+ // check for the case where dest is the same object as src
+ if (is_same_object(dest,src))
+ return;
+
+ impl_assign_image(dest, mat(src));
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename dest_image_type,
+ typename src_image_type
+ >
+ void impl_assign_image_scaled (
+ image_view<dest_image_type>& dest,
+ const src_image_type& src,
+ const double thresh
+ )
+ {
+ DLIB_ASSERT( thresh > 0,
+ "\tvoid assign_image_scaled()"
+ << "\n\t You have given an threshold value"
+ << "\n\t thresh: " << thresh
+ );
+
+
+ typedef typename image_traits<dest_image_type>::pixel_type dest_pixel;
+
+ // If the destination has a dynamic range big enough to contain the source image data then just do a
+ // regular assign_image()
+ if (pixel_traits<dest_pixel>::max() >= pixel_traits<typename src_image_type::type>::max() &&
+ pixel_traits<dest_pixel>::min() <= pixel_traits<typename src_image_type::type>::min() )
+ {
+ impl_assign_image(dest, src);
+ return;
+ }
+
+ dest.set_size(src.nr(),src.nc());
+
+ if (src.size() == 0)
+ return;
+
+ if (src.size() == 1)
+ {
+ impl_assign_image(dest, src);
+ return;
+ }
+
+ // gather image statistics
+ running_stats<double> rs;
+ for (long r = 0; r < src.nr(); ++r)
+ {
+ for (long c = 0; c < src.nc(); ++c)
+ {
+ rs.add(get_pixel_intensity(src(r,c)));
+ }
+ }
+ typedef typename pixel_traits<typename src_image_type::type>::basic_pixel_type spix_type;
+
+ if (std::numeric_limits<spix_type>::is_integer)
+ {
+ // If the destination has a dynamic range big enough to contain the source image data then just do a
+ // regular assign_image()
+ if (pixel_traits<dest_pixel>::max() >= rs.max() &&
+ pixel_traits<dest_pixel>::min() <= rs.min() )
+ {
+ impl_assign_image(dest, src);
+ return;
+ }
+ }
+
+ // Figure out the range of pixel values based on image statistics. There might be some huge
+ // outliers so don't just pick the min and max values.
+ const double upper = std::min(rs.mean() + thresh*rs.stddev(), rs.max());
+ const double lower = std::max(rs.mean() - thresh*rs.stddev(), rs.min());
+
+
+ const double dest_min = pixel_traits<dest_pixel>::min();
+ const double dest_max = pixel_traits<dest_pixel>::max();
+
+ const double scale = (upper!=lower)? ((dest_max - dest_min) / (upper - lower)) : 0;
+
+ for (long r = 0; r < src.nr(); ++r)
+ {
+ for (long c = 0; c < src.nc(); ++c)
+ {
+ const double val = get_pixel_intensity(src(r,c)) - lower;
+
+ assign_pixel(dest[r][c], scale*val + dest_min);
+ }
+ }
+ }
+
+ template <
+ typename dest_image_type,
+ typename src_image_type
+ >
+ void impl_assign_image_scaled (
+ dest_image_type& dest_,
+ const src_image_type& src,
+ const double thresh
+ )
+ {
+ image_view<dest_image_type> dest(dest_);
+ impl_assign_image_scaled(dest, src, thresh);
+ }
+
+ template <
+ typename dest_image_type,
+ typename src_image_type
+ >
+ void assign_image_scaled (
+ dest_image_type& dest,
+ const src_image_type& src,
+ const double thresh = 4
+ )
+ {
+ // check for the case where dest is the same object as src
+ if (is_same_object(dest,src))
+ return;
+
+ impl_assign_image_scaled(dest, mat(src),thresh);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename dest_image_type,
+ typename src_pixel_type
+ >
+ void assign_all_pixels (
+ image_view<dest_image_type>& dest_img,
+ const src_pixel_type& src_pixel
+ )
+ {
+ for (long r = 0; r < dest_img.nr(); ++r)
+ {
+ for (long c = 0; c < dest_img.nc(); ++c)
+ {
+ assign_pixel(dest_img[r][c], src_pixel);
+ }
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename dest_image_type,
+ typename src_pixel_type
+ >
+ void assign_all_pixels (
+ dest_image_type& dest_img_,
+ const src_pixel_type& src_pixel
+ )
+ {
+ image_view<dest_image_type> dest_img(dest_img_);
+ assign_all_pixels(dest_img, src_pixel);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type
+ >
+ void assign_border_pixels (
+ image_view<image_type>& img,
+ long x_border_size,
+ long y_border_size,
+ const typename image_traits<image_type>::pixel_type& p
+ )
+ {
+ DLIB_ASSERT( x_border_size >= 0 && y_border_size >= 0,
+ "\tvoid assign_border_pixels(img, p, border_size)"
+ << "\n\tYou have given an invalid border_size"
+ << "\n\tx_border_size: " << x_border_size
+ << "\n\ty_border_size: " << y_border_size
+ );
+
+ y_border_size = std::min(y_border_size, img.nr()/2+1);
+ x_border_size = std::min(x_border_size, img.nc()/2+1);
+
+ // assign the top border
+ for (long r = 0; r < y_border_size; ++r)
+ {
+ for (long c = 0; c < img.nc(); ++c)
+ {
+ img[r][c] = p;
+ }
+ }
+
+ // assign the bottom border
+ for (long r = img.nr()-y_border_size; r < img.nr(); ++r)
+ {
+ for (long c = 0; c < img.nc(); ++c)
+ {
+ img[r][c] = p;
+ }
+ }
+
+ // now assign the two sides
+ for (long r = y_border_size; r < img.nr()-y_border_size; ++r)
+ {
+ // left border
+ for (long c = 0; c < x_border_size; ++c)
+ img[r][c] = p;
+
+ // right border
+ for (long c = img.nc()-x_border_size; c < img.nc(); ++c)
+ img[r][c] = p;
+ }
+ }
+
+ template <
+ typename image_type
+ >
+ void assign_border_pixels (
+ image_type& img_,
+ long x_border_size,
+ long y_border_size,
+ const typename image_traits<image_type>::pixel_type& p
+ )
+ {
+ image_view<image_type> img(img_);
+ assign_border_pixels(img, x_border_size, y_border_size, p);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type
+ >
+ void zero_border_pixels (
+ image_type& img,
+ long x_border_size,
+ long y_border_size
+ )
+ {
+ DLIB_ASSERT( x_border_size >= 0 && y_border_size >= 0,
+ "\tvoid zero_border_pixels(img, p, border_size)"
+ << "\n\tYou have given an invalid border_size"
+ << "\n\tx_border_size: " << x_border_size
+ << "\n\ty_border_size: " << y_border_size
+ );
+
+ typename image_traits<image_type>::pixel_type zero_pixel;
+ assign_pixel_intensity(zero_pixel, 0);
+ assign_border_pixels(img, x_border_size, y_border_size, zero_pixel);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type
+ >
+ void zero_border_pixels (
+ image_view<image_type>& img,
+ long x_border_size,
+ long y_border_size
+ )
+ {
+ DLIB_ASSERT( x_border_size >= 0 && y_border_size >= 0,
+ "\tvoid zero_border_pixels(img, p, border_size)"
+ << "\n\tYou have given an invalid border_size"
+ << "\n\tx_border_size: " << x_border_size
+ << "\n\ty_border_size: " << y_border_size
+ );
+
+ typename image_traits<image_type>::pixel_type zero_pixel;
+ assign_pixel_intensity(zero_pixel, 0);
+ assign_border_pixels(img, x_border_size, y_border_size, zero_pixel);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type
+ >
+ void zero_border_pixels (
+ image_view<image_type>& img,
+ rectangle inside
+ )
+ {
+ inside = inside.intersect(get_rect(img));
+ if (inside.is_empty())
+ {
+ assign_all_pixels(img, 0);
+ return;
+ }
+
+ for (long r = 0; r < inside.top(); ++r)
+ {
+ for (long c = 0; c < img.nc(); ++c)
+ assign_pixel(img[r][c], 0);
+ }
+ for (long r = inside.top(); r <= inside.bottom(); ++r)
+ {
+ for (long c = 0; c < inside.left(); ++c)
+ assign_pixel(img[r][c], 0);
+ for (long c = inside.right()+1; c < img.nc(); ++c)
+ assign_pixel(img[r][c], 0);
+ }
+ for (long r = inside.bottom()+1; r < img.nr(); ++r)
+ {
+ for (long c = 0; c < img.nc(); ++c)
+ assign_pixel(img[r][c], 0);
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type
+ >
+ void zero_border_pixels (
+ image_type& img_,
+ const rectangle& inside
+ )
+ {
+ image_view<image_type> img(img_);
+ zero_border_pixels(img, inside);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_ASSIGN_IMAGe_
+
+
+
diff --git a/ml/dlib/dlib/image_transforms/assign_image_abstract.h b/ml/dlib/dlib/image_transforms/assign_image_abstract.h
new file mode 100644
index 000000000..5ba262ba5
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/assign_image_abstract.h
@@ -0,0 +1,196 @@
+// Copyright (C) 2007 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_ASSIGN_IMAGe_ABSTRACT
+#ifdef DLIB_ASSIGN_IMAGe_ABSTRACT
+
+#include "../pixel.h"
+#include "../image_processing/generic_image.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename dest_image_type,
+ typename src_image_type
+ >
+ void assign_image (
+ dest_image_type& dest_img,
+ const src_image_type& src_img
+ );
+ /*!
+ requires
+ - src_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h or any object convertible to a matrix
+ via mat().
+ - dest_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h or an image_view.
+ ensures
+ - #dest_img.nc() == src_img.nc()
+ - #dest_img.nr() == src_img.nr()
+ - for all valid r and c:
+ - performs assign_pixel(#dest_img[r][c],src_img[r][c])
+ (i.e. copies the src image to dest image)
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename dest_image_type,
+ typename src_image_type
+ >
+ void assign_image_scaled (
+ dest_image_type& dest_img,
+ const src_image_type& src_img,
+ const double thresh = 4
+ );
+ /*!
+ requires
+ - src_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h or any object convertible to a matrix
+ via mat().
+ - dest_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h or an image_view.
+ - thresh > 0
+ ensures
+ - #dest_img.nc() == src_img.nc()
+ - #dest_img.nr() == src_img.nr()
+ - if (dest_img's pixels have a wide enough dynamic range to contain all the
+ pixels in src_img. (Note that dynamic range is determined by the min() and
+ max() pixel_traits properties)) then
+ - performs: assign_image(dest_img, src_img)
+ (i.e. in this case, no scaling is performed. Just a normal color space
+ conversion and copy )
+ - else
+ - #dest_img will be converted to a grayscale image
+ - scales the contents of src_img into the dynamic range of dest_img and then
+ assigns the result into dest_img. The thresh parameter is used to filter
+ source pixel values which are outliers. These outliers will saturate
+ at the edge of the destination image's dynamic range.
+ - Specifically, for all valid r and c:
+ - scales get_pixel_intensity(src_img[r][c]) into the dynamic range
+ of the dest_img. This is done by computing the mean and standard
+ deviation of src_img. Call the mean M and the standard deviation
+ D. Then the scaling from src_img to dest_img is performed using
+ the following mapping:
+ let SRC_UPPER = min(M + thresh*D, max(mat(src_img)))
+ let SRC_LOWER = max(M - thresh*D, min(mat(src_img)))
+ let DEST_UPPER = pixel_traits<image_traits<dest_image_type>::pixel_type>::max()
+ let DEST_LOWER = pixel_traits<image_traits<dest_image_type>::pixel_type>::min()
+
+ MAPPING: [SRC_LOWER, SRC_UPPER] -> [DEST_LOWER, DEST_UPPER]
+
+ Where this mapping is a linear mapping of values from the left range
+ into the right range of values. Source pixel values outside the left
+ range are modified to be at the appropriate end of the range.
+
+ The scaled pixel is then stored in dest_img[r][c].
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename dest_image_type,
+ typename src_pixel_type
+ >
+ void assign_all_pixels (
+ dest_image_type& dest_img,
+ const src_pixel_type& src_pixel
+ );
+ /*!
+ requires
+ - dest_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h or an image_view.
+ - pixel_traits<src_pixel_type> is defined
+ ensures
+ - #dest_img.nc() == dest_img.nc()
+ - #dest_img.nr() == dest_img.nr()
+ (i.e. the size of dest_img isn't changed by this function)
+ - for all valid r and c:
+ - performs assign_pixel(#dest_img[r][c],src_pixel)
+ (i.e. assigns the src pixel to every pixel in the dest image)
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type
+ >
+ void assign_border_pixels (
+ image_type& img,
+ long x_border_size,
+ long y_border_size,
+ const typename image_traits<image_type>::pixel_type& p
+ );
+ /*!
+ requires
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h or an image_view
+ - x_border_size >= 0
+ - y_border_size >= 0
+ ensures
+ - #img.nc() == img.nc()
+ - #img.nr() == img.nr()
+ (i.e. the size of img isn't changed by this function)
+ - for all valid r such that r+y_border_size or r-y_border_size gives an invalid row
+ - for all valid c such that c+x_border_size or c-x_border_size gives an invalid column
+ - performs assign_pixel(#img[r][c],p)
+ (i.e. assigns the given pixel to every pixel in the border of img)
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type
+ >
+ void zero_border_pixels (
+ image_type& img,
+ long x_border_size,
+ long y_border_size
+ );
+ /*!
+ requires
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h or an image_view
+ - x_border_size >= 0
+ - y_border_size >= 0
+ ensures
+ - #img.nc() == img.nc()
+ - #img.nr() == img.nr()
+ (i.e. the size of img isn't changed by this function)
+ - for all valid r such that r+y_border_size or r-y_border_size gives an invalid row
+ - for all valid c such that c+x_border_size or c-x_border_size gives an invalid column
+ - performs assign_pixel(#img[r][c], 0 )
+ (i.e. assigns 0 to every pixel in the border of img)
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type
+ >
+ void zero_border_pixels (
+ image_type& img,
+ rectangle inside
+ );
+ /*!
+ requires
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h or an image_view
+ ensures
+ - #img.nc() == img.nc()
+ - #img.nr() == img.nr()
+ (i.e. the size of img isn't changed by this function)
+ - All the pixels in img that are not contained inside the inside rectangle
+ given to this function are set to 0. That is, anything not "inside" is on
+ the border and set to 0.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_ASSIGN_IMAGe_ABSTRACT
+
+
diff --git a/ml/dlib/dlib/image_transforms/colormaps.h b/ml/dlib/dlib/image_transforms/colormaps.h
new file mode 100644
index 000000000..813d1ff75
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/colormaps.h
@@ -0,0 +1,269 @@
+// Copyright (C) 2011 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_RANDOMLY_COlOR_IMAGE_Hh_
+#define DLIB_RANDOMLY_COlOR_IMAGE_Hh_
+
+#include "colormaps_abstract.h"
+#include "../hash.h"
+#include "../pixel.h"
+#include "../matrix.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ template <typename T>
+ struct op_randomly_color_image : does_not_alias
+ {
+ op_randomly_color_image( const T& img_) : img(img_){}
+
+ const T& img;
+
+ const static long cost = 7;
+ const static long NR = 0;
+ const static long NC = 0;
+ typedef rgb_pixel type;
+ typedef const rgb_pixel const_ret_type;
+ typedef default_memory_manager mem_manager_type;
+ typedef row_major_layout layout_type;
+
+ const_ret_type apply (long r, long c ) const
+ {
+ const unsigned long gray = get_pixel_intensity(mat(img)(r,c));
+ if (gray != 0)
+ {
+ const uint32 h = murmur_hash3_2(gray,0);
+ rgb_pixel pix;
+ pix.red = static_cast<unsigned char>(h)%200 + 55;
+ pix.green = static_cast<unsigned char>(h>>8)%200 + 55;
+ pix.blue = static_cast<unsigned char>(h>>16)%200 + 55;
+ return pix;
+ }
+ else
+ {
+ // keep black pixels black
+ return rgb_pixel(0,0,0);
+ }
+ }
+
+ long nr () const { return num_rows(img); }
+ long nc () const { return num_columns(img); }
+ };
+
+ template <
+ typename image_type
+ >
+ const matrix_op<op_randomly_color_image<image_type> >
+ randomly_color_image (
+ const image_type& img
+ )
+ {
+ typedef op_randomly_color_image<image_type> op;
+ return matrix_op<op>(op(img));
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ inline rgb_pixel colormap_heat (
+ double value,
+ double min_val,
+ double max_val
+ )
+ {
+ // scale the gray value into the range [0, 1]
+ const double gray = put_in_range(0, 1, (value - min_val)/(max_val-min_val));
+ rgb_pixel pix(0,0,0);
+
+ pix.red = static_cast<unsigned char>(std::min(gray/0.4,1.0)*255 + 0.5);
+
+ if (gray > 0.4)
+ {
+ pix.green = static_cast<unsigned char>(std::min((gray-0.4)/0.4,1.0)*255 + 0.5);
+ }
+ if (gray > 0.8)
+ {
+ pix.blue = static_cast<unsigned char>(std::min((gray-0.8)/0.2,1.0)*255 + 0.5);
+ }
+
+ return pix;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <typename T>
+ struct op_heatmap : does_not_alias
+ {
+ op_heatmap(
+ const T& img_,
+ const double max_val_,
+ const double min_val_
+ ) : img(img_), max_val(max_val_), min_val(min_val_){}
+
+ const T& img;
+
+ const double max_val;
+ const double min_val;
+
+ const static long cost = 7;
+ const static long NR = 0;
+ const static long NC = 0;
+ typedef rgb_pixel type;
+ typedef const rgb_pixel const_ret_type;
+ typedef default_memory_manager mem_manager_type;
+ typedef row_major_layout layout_type;
+
+ const_ret_type apply (long r, long c ) const
+ {
+ return colormap_heat(get_pixel_intensity(mat(img)(r,c)), min_val, max_val);
+ }
+
+ long nr () const { return num_rows(img); }
+ long nc () const { return num_columns(img); }
+ };
+
+ template <
+ typename image_type
+ >
+ const matrix_op<op_heatmap<image_type> >
+ heatmap (
+ const image_type& img,
+ double max_val,
+ double min_val = 0
+ )
+ {
+ typedef op_heatmap<image_type> op;
+ return matrix_op<op>(op(img,max_val,min_val));
+ }
+
+ template <
+ typename image_type
+ >
+ const matrix_op<op_heatmap<image_type> >
+ heatmap (
+ const image_type& img
+ )
+ {
+ typedef op_heatmap<image_type> op;
+ if (num_columns(img) * num_rows(img) != 0)
+ return matrix_op<op>(op(img,max(mat(img)),min(mat(img))));
+ else
+ return matrix_op<op>(op(img,0,0));
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ inline rgb_pixel colormap_jet (
+ double value,
+ double min_val,
+ double max_val
+ )
+ {
+ // scale the gray value into the range [0, 8]
+ const double gray = 8*put_in_range(0, 1, (value - min_val)/(max_val-min_val));
+ rgb_pixel pix;
+ // s is the slope of color change
+ const double s = 1.0/2.0;
+
+ if (gray <= 1)
+ {
+ pix.red = 0;
+ pix.green = 0;
+ pix.blue = static_cast<unsigned char>((gray+1)*s*255 + 0.5);
+ }
+ else if (gray <= 3)
+ {
+ pix.red = 0;
+ pix.green = static_cast<unsigned char>((gray-1)*s*255 + 0.5);
+ pix.blue = 255;
+ }
+ else if (gray <= 5)
+ {
+ pix.red = static_cast<unsigned char>((gray-3)*s*255 + 0.5);
+ pix.green = 255;
+ pix.blue = static_cast<unsigned char>((5-gray)*s*255 + 0.5);
+ }
+ else if (gray <= 7)
+ {
+ pix.red = 255;
+ pix.green = static_cast<unsigned char>((7-gray)*s*255 + 0.5);
+ pix.blue = 0;
+ }
+ else
+ {
+ pix.red = static_cast<unsigned char>((9-gray)*s*255 + 0.5);
+ pix.green = 0;
+ pix.blue = 0;
+ }
+
+ return pix;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <typename T>
+ struct op_jet : does_not_alias
+ {
+ op_jet(
+ const T& img_,
+ const double max_val_,
+ const double min_val_
+ ) : img(img_), max_val(max_val_), min_val(min_val_){}
+
+ const T& img;
+
+ const double max_val;
+ const double min_val;
+
+ const static long cost = 7;
+ const static long NR = 0;
+ const static long NC = 0;
+ typedef rgb_pixel type;
+ typedef const rgb_pixel const_ret_type;
+ typedef default_memory_manager mem_manager_type;
+ typedef row_major_layout layout_type;
+
+ const_ret_type apply (long r, long c ) const
+ {
+ return colormap_jet(get_pixel_intensity(mat(img)(r,c)), min_val, max_val);
+ }
+
+ long nr () const { return num_rows(img); }
+ long nc () const { return num_columns(img); }
+ };
+
+ template <
+ typename image_type
+ >
+ const matrix_op<op_jet<image_type> >
+ jet (
+ const image_type& img,
+ double max_val,
+ double min_val = 0
+ )
+ {
+ typedef op_jet<image_type> op;
+ return matrix_op<op>(op(img,max_val,min_val));
+ }
+
+ template <
+ typename image_type
+ >
+ const matrix_op<op_jet<image_type> >
+ jet (
+ const image_type& img
+ )
+ {
+ typedef op_jet<image_type> op;
+ if (num_columns(img) * num_rows(img) != 0)
+ return matrix_op<op>(op(img,max(mat(img)),min(mat(img))));
+ else
+ return matrix_op<op>(op(img,0,0));
+ }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_RANDOMLY_COlOR_IMAGE_Hh_
+
diff --git a/ml/dlib/dlib/image_transforms/colormaps_abstract.h b/ml/dlib/dlib/image_transforms/colormaps_abstract.h
new file mode 100644
index 000000000..41a7784ba
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/colormaps_abstract.h
@@ -0,0 +1,152 @@
+// Copyright (C) 2011 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_RANDOMLY_COlOR_IMAGE_ABSTRACT_Hh_
+#ifdef DLIB_RANDOMLY_COlOR_IMAGE_ABSTRACT_Hh_
+
+#include "../hash.h"
+#include "../pixel.h"
+#include "../matrix.h"
+#include "../image_processing/generic_image.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type
+ >
+ const matrix_exp randomly_color_image (
+ const image_type& img
+ );
+ /*!
+ requires
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h, or something convertible to a matrix
+ via mat().
+ ensures
+ - randomly generates a mapping from gray level pixel values
+ to the RGB pixel space and then uses this mapping to create
+ a colored version of img. Returns a matrix which represents
+ this colored version of img.
+ - black pixels in img will remain black in the output image.
+ - The returned matrix will have the same dimensions as img.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ rgb_pixel colormap_heat (
+ double value,
+ double min_val,
+ double max_val
+ );
+ /*!
+ requires
+ - min_val <= max_val
+ ensures
+ - Maps value to a color. In particular, we use a heatmap color scheme where
+ values <= min_val are black and larger values become more red, then yellow,
+ and then white as they approach max_val.
+ !*/
+
+ template <
+ typename image_type
+ >
+ const matrix_exp heatmap (
+ const image_type& img,
+ double max_val,
+ double min_val = 0
+ );
+ /*!
+ requires
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h, or something convertible to a matrix
+ via mat().
+ ensures
+ - Interprets img as a grayscale image and returns a new matrix which represents
+ a colored version of img. In particular, the colormap is defined by
+ out_color = colormap_heat(grayscale_pixel_value, min_val, max_val).
+ - The returned matrix will have the same dimensions as img.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type
+ >
+ const matrix_exp heatmap (
+ const image_type& img
+ );
+ /*!
+ requires
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h, or something convertible to a matrix
+ via mat().
+ ensures
+ - returns heatmap(img, max(mat(img)), min(mat(img)))
+ !*/
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ rgb_pixel colormap_jet (
+ double value,
+ double min_val,
+ double max_val
+ );
+ /*!
+ requires
+ - min_val <= max_val
+ ensures
+ - Maps value to a color. In particular, we use a jet color scheme where
+ values <= min_val are dark blue and larger values become light blue, then
+ yellow, and then finally red as they approach max_val.
+ !*/
+
+ template <
+ typename image_type
+ >
+ const matrix_exp jet (
+ const image_type& img,
+ double max_val,
+ double min_val = 0
+ );
+ /*!
+ requires
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h, or something convertible to a matrix
+ via mat().
+ ensures
+ - Interprets img as a grayscale image and returns a new matrix which represents
+ a colored version of img. In particular, the colormap is defined by
+ out_color = colormap_jet(grayscale_pixel_value, min_val, max_val).
+ - The returned matrix will have the same dimensions as img.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type
+ >
+ const matrix_exp jet (
+ const image_type& img
+ );
+ /*!
+ requires
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h, or something convertible to a matrix
+ via mat().
+ ensures
+ - returns jet(img, max(mat(img)), min(mat(img)))
+ !*/
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_RANDOMLY_COlOR_IMAGE_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/image_transforms/draw.h b/ml/dlib/dlib/image_transforms/draw.h
new file mode 100644
index 000000000..66737b215
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/draw.h
@@ -0,0 +1,396 @@
+// Copyright (C) 2008 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_DRAW_IMAGe_
+#define DLIB_DRAW_IMAGe_
+
+#include "draw_abstract.h"
+#include "../algs.h"
+#include "../pixel.h"
+#include "../matrix.h"
+#include <cmath>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type,
+ typename pixel_type
+ >
+ void draw_line (
+ long x1,
+ long y1,
+ long x2,
+ long y2,
+ image_type& c_,
+ const pixel_type& val
+ )
+ {
+ image_view<image_type> c(c_);
+ if (x1 == x2)
+ {
+ // make sure y1 comes before y2
+ if (y1 > y2)
+ swap(y1,y2);
+
+ if (x1 < 0 || x1 >= c.nc())
+ return;
+
+
+ // this is a vertical line
+ for (long y = y1; y <= y2; ++y)
+ {
+ if (y < 0 || y >= c.nr())
+ continue;
+
+ assign_pixel(c[y][x1], val);
+ }
+ }
+ else if (y1 == y2)
+ {
+
+ // make sure x1 comes before x2
+ if (x1 > x2)
+ swap(x1,x2);
+
+ if (y1 < 0 || y1 >= c.nr())
+ return;
+
+ // this is a horizontal line
+ for (long x = x1; x <= x2; ++x)
+ {
+ if (x < 0 || x >= c.nc())
+ continue;
+
+ assign_pixel(c[y1][x] , val);
+ }
+ }
+ else
+ {
+ // This part is a little more complicated because we are going to perform alpha
+ // blending so the diagonal lines look nice.
+ const rectangle valid_area = get_rect(c);
+ rgb_alpha_pixel alpha_pixel;
+ assign_pixel(alpha_pixel, val);
+ const unsigned char max_alpha = alpha_pixel.alpha;
+
+ const long rise = (((long)y2) - ((long)y1));
+ const long run = (((long)x2) - ((long)x1));
+ if (std::abs(rise) < std::abs(run))
+ {
+ const double slope = ((double)rise)/run;
+
+
+ double first, last;
+
+
+ if (x1 > x2)
+ {
+ first = std::max(x2,valid_area.left());
+ last = std::min(x1,valid_area.right());
+ }
+ else
+ {
+ first = std::max(x1,valid_area.left());
+ last = std::min(x2,valid_area.right());
+ }
+
+ long y;
+ long x;
+ const double x1f = x1;
+ const double y1f = y1;
+ for (double i = first; i <= last; ++i)
+ {
+ const double dy = slope*(i-x1f) + y1f;
+ const double dx = i;
+
+ y = static_cast<long>(dy);
+ x = static_cast<long>(dx);
+
+
+ if (y >= valid_area.top() && y <= valid_area.bottom())
+ {
+ alpha_pixel.alpha = static_cast<unsigned char>((1.0-(dy-y))*max_alpha);
+ assign_pixel(c[y][x], alpha_pixel);
+ }
+ if (y+1 >= valid_area.top() && y+1 <= valid_area.bottom())
+ {
+ alpha_pixel.alpha = static_cast<unsigned char>((dy-y)*max_alpha);
+ assign_pixel(c[y+1][x], alpha_pixel);
+ }
+ }
+ }
+ else
+ {
+ const double slope = ((double)run)/rise;
+
+
+ double first, last;
+
+
+ if (y1 > y2)
+ {
+ first = std::max(y2,valid_area.top());
+ last = std::min(y1,valid_area.bottom());
+ }
+ else
+ {
+ first = std::max(y1,valid_area.top());
+ last = std::min(y2,valid_area.bottom());
+ }
+
+ long x;
+ long y;
+ const double x1f = x1;
+ const double y1f = y1;
+ for (double i = first; i <= last; ++i)
+ {
+ const double dx = slope*(i-y1f) + x1f;
+ const double dy = i;
+
+ y = static_cast<long>(dy);
+ x = static_cast<long>(dx);
+
+ if (x >= valid_area.left() && x <= valid_area.right())
+ {
+ alpha_pixel.alpha = static_cast<unsigned char>((1.0-(dx-x))*max_alpha);
+ assign_pixel(c[y][x], alpha_pixel);
+ }
+ if (x+1 >= valid_area.left() && x+1 <= valid_area.right())
+ {
+ alpha_pixel.alpha = static_cast<unsigned char>((dx-x)*max_alpha);
+ assign_pixel(c[y][x+1], alpha_pixel);
+ }
+ }
+ }
+ }
+
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type,
+ typename pixel_type
+ >
+ void draw_line (
+ image_type& c,
+ const point& p1,
+ const point& p2,
+ const pixel_type& val
+ )
+ {
+ draw_line(p1.x(),p1.y(),p2.x(),p2.y(),c,val);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type,
+ typename pixel_type
+ >
+ void draw_rectangle (
+ image_type& c,
+ const rectangle& rect,
+ const pixel_type& val
+ )
+ {
+ draw_line(c, rect.tl_corner(), rect.tr_corner(), val);
+ draw_line(c, rect.bl_corner(), rect.br_corner(), val);
+ draw_line(c, rect.tl_corner(), rect.bl_corner(), val);
+ draw_line(c, rect.tr_corner(), rect.br_corner(), val);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type,
+ typename pixel_type
+ >
+ void draw_rectangle (
+ image_type& c,
+ const rectangle& rect,
+ const pixel_type& val,
+ unsigned int thickness
+ )
+ {
+ for (unsigned int i = 0; i < thickness; ++i)
+ {
+ if ((i%2)==0)
+ draw_rectangle(c,shrink_rect(rect,(i+1)/2),val);
+ else
+ draw_rectangle(c,grow_rect(rect,(i+1)/2),val);
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type,
+ typename pixel_type
+ >
+ void fill_rect (
+ image_type& img_,
+ const rectangle& rect,
+ const pixel_type& pixel
+ )
+ {
+ image_view<image_type> img(img_);
+ rectangle area = rect.intersect(get_rect(img));
+
+ for (long r = area.top(); r <= area.bottom(); ++r)
+ {
+ for (long c = area.left(); c <= area.right(); ++c)
+ {
+ assign_pixel(img[r][c], pixel);
+ }
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_array_type
+ >
+ matrix<typename image_traits<typename image_array_type::value_type>::pixel_type> tile_images (
+ const image_array_type& images
+ )
+ {
+ typedef typename image_traits<typename image_array_type::value_type>::pixel_type T;
+
+ if (images.size() == 0)
+ return matrix<T>();
+
+ const unsigned long size_nc = square_root(images.size());
+ const unsigned long size_nr = (size_nc*(size_nc-1)>=images.size())? size_nc-1 : size_nc;
+ // Figure out the size we have to use for each chip in the big main image. We will
+ // use the largest dimensions seen across all the chips.
+ long nr = 0;
+ long nc = 0;
+ for (unsigned long i = 0; i < images.size(); ++i)
+ {
+ nr = std::max(num_rows(images[i]), nr);
+ nc = std::max(num_columns(images[i]), nc);
+ }
+
+ matrix<T> temp(size_nr*nr, size_nc*nc);
+ T background_color;
+ assign_pixel(background_color, 0);
+ temp = background_color;
+ unsigned long idx = 0;
+ for (unsigned long r = 0; r < size_nr; ++r)
+ {
+ for (unsigned long c = 0; c < size_nc; ++c)
+ {
+ if (idx < images.size())
+ {
+ set_subm(temp, r*nr, c*nc, nr, nc) = mat(images[idx]);
+ }
+ ++idx;
+ }
+ }
+ return temp;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type,
+ typename pixel_type
+ >
+ void draw_solid_circle (
+ image_type& img_,
+ const dpoint& center_point,
+ double radius,
+ const pixel_type& pixel
+ )
+ {
+ image_view<image_type> img(img_);
+ using std::sqrt;
+ const rectangle valid_area(get_rect(img));
+ const double x = center_point.x();
+ const double y = center_point.y();
+ const point cp(center_point);
+ if (radius > 1)
+ {
+ long first_x = static_cast<long>(x - radius + 0.5);
+ long last_x = static_cast<long>(x + radius + 0.5);
+ const double rs = radius*radius;
+
+ // ensure that we only loop over the part of the x dimension that this
+ // image contains.
+ if (first_x < valid_area.left())
+ first_x = valid_area.left();
+ if (last_x > valid_area.right())
+ last_x = valid_area.right();
+
+ long top, bottom;
+
+ top = static_cast<long>(sqrt(std::max(rs - (first_x-x-0.5)*(first_x-x-0.5),0.0))+0.5);
+ top += y;
+ long last = top;
+
+ // draw the left half of the circle
+ long middle = std::min(cp.x()-1,last_x);
+ for (long i = first_x; i <= middle; ++i)
+ {
+ double a = i - x + 0.5;
+ // find the top of the arc
+ top = static_cast<long>(sqrt(std::max(rs - a*a,0.0))+0.5);
+ top += y;
+ long temp = top;
+
+ while(top >= last)
+ {
+ bottom = y - top + y;
+ draw_line(img_, point(i,top),point(i,bottom),pixel);
+ --top;
+ }
+
+ last = temp;
+ }
+
+ middle = std::max(cp.x(),first_x);
+ top = static_cast<long>(sqrt(std::max(rs - (last_x-x+0.5)*(last_x-x+0.5),0.0))+0.5);
+ top += y;
+ last = top;
+ // draw the right half of the circle
+ for (long i = last_x; i >= middle; --i)
+ {
+ double a = i - x - 0.5;
+ // find the top of the arc
+ top = static_cast<long>(sqrt(std::max(rs - a*a,0.0))+0.5);
+ top += y;
+ long temp = top;
+
+ while(top >= last)
+ {
+ bottom = y - top + y;
+ draw_line(img_, point(i,top),point(i,bottom),pixel);
+ --top;
+ }
+
+ last = temp;
+ }
+ }
+ else if (valid_area.contains(cp))
+ {
+ // For circles smaller than a pixel we will just alpha blend them in proportion
+ // to how small they are.
+ rgb_alpha_pixel temp;
+ assign_pixel(temp, pixel);
+ temp.alpha = static_cast<unsigned char>(255*radius + 0.5);
+ assign_pixel(img[cp.y()][cp.x()], temp);
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_DRAW_IMAGe_
+
+
+
+
diff --git a/ml/dlib/dlib/image_transforms/draw_abstract.h b/ml/dlib/dlib/image_transforms/draw_abstract.h
new file mode 100644
index 000000000..6631f8d8f
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/draw_abstract.h
@@ -0,0 +1,150 @@
+// Copyright (C) 2008 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_DRAW_IMAGe_ABSTRACT
+#ifdef DLIB_DRAW_IMAGe_ABSTRACT
+
+#include "../matrix.h"
+#include "../image_processing/generic_image.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type,
+ typename pixel_type
+ >
+ void draw_line (
+ image_type& img,
+ const point& p1,
+ const point& p2,
+ const pixel_type& val
+ );
+ /*!
+ requires
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ ensures
+ - #img.nr() == img.nr() && #img.nc() == img.nc()
+ (i.e. the dimensions of the input image are not changed)
+ - for all valid r and c that are on the line between point p1 and p2:
+ - performs assign_pixel(img[r][c], val)
+ (i.e. it draws the line from p1 to p2 onto the image)
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type,
+ typename pixel_type
+ >
+ void draw_line (
+ long x1,
+ long y1,
+ long x2,
+ long y2,
+ image_type& img,
+ const pixel_type& val
+ );
+ /*!
+ requires
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ ensures
+ - performs draw_line(img, point(x1,y1), point(x2,y2), val)
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type,
+ typename pixel_type
+ >
+ void draw_rectangle (
+ image_type& img,
+ const rectangle& rect,
+ const pixel_type& val,
+ unsigned int thickness = 1
+ );
+ /*!
+ requires
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - pixel_traits<pixel_type> is defined
+ ensures
+ - Draws the given rectangle onto the image img. It does this by calling
+ draw_line() four times to draw the four sides of the rectangle.
+ - The rectangle is drawn with the color given by val.
+ - The drawn rectangle will have edges that are thickness pixels wide.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type,
+ typename pixel_type
+ >
+ void draw_solid_circle (
+ image_type& img,
+ const dpoint& center_point,
+ double radius,
+ const pixel_type& pixel
+ );
+ /*!
+ requires
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - pixel_traits<pixel_type> is defined
+ ensures
+ - Draws a fully filled in circle onto image that is centered at center_point
+ and has the given radius. The circle will be filled by assigning the given
+ pixel value to each element of the circle.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type,
+ typename pixel_type
+ >
+ void fill_rect (
+ image_type& img,
+ const rectangle& rect,
+ const pixel_type& pixel
+ );
+ /*!
+ requires
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - pixel_traits<pixel_type> is defined
+ ensures
+ - fills the area defined by rect in the given image with the given pixel value.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_array_type
+ >
+ matrix<typename image_traits<typename image_array_type::value_type>::pixel_type> tile_images (
+ const image_array_type& images
+ );
+ /*!
+ requires
+ - image_array_type is a dlib::array of image objects where each image object
+ implements the interface defined in dlib/image_processing/generic_image.h
+ ensures
+ - This function takes the given images and tiles them into a single large
+ square image and returns this new big tiled image. Therefore, it is a useful
+ method to visualize many small images at once.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_DRAW_IMAGe_ABSTRACT
+
+
+
diff --git a/ml/dlib/dlib/image_transforms/edge_detector.h b/ml/dlib/dlib/image_transforms/edge_detector.h
new file mode 100644
index 000000000..2fa898fed
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/edge_detector.h
@@ -0,0 +1,302 @@
+// Copyright (C) 2008 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_EDGE_DETECTOr_
+#define DLIB_EDGE_DETECTOr_
+
+#include "edge_detector_abstract.h"
+#include "../pixel.h"
+#include "../array2d.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename T
+ >
+ inline char edge_orientation (
+ const T& x_,
+ const T& y_
+ )
+ {
+
+ // if this is a perfectly horizontal gradient then return right away
+ if (x_ == 0)
+ {
+ return '|';
+ }
+ else if (y_ == 0) // if this is a perfectly vertical gradient then return right away
+ {
+ return '-';
+ }
+
+ // Promote x so that when we multiply by 128 later we know overflow won't happen.
+ typedef typename promote<T>::type type;
+ type x = x_;
+ type y = y_;
+
+ if (x < 0)
+ {
+ x = -x;
+ if (y < 0)
+ {
+ y = -y;
+ x *= 128;
+ const type temp = x/y;
+ if (temp > 309)
+ return '-';
+ else if (temp > 53)
+ return '/';
+ else
+ return '|';
+ }
+ else
+ {
+ x *= 128;
+ const type temp = x/y;
+ if (temp > 309)
+ return '-';
+ else if (temp > 53)
+ return '\\';
+ else
+ return '|';
+ }
+ }
+ else
+ {
+ if (y < 0)
+ {
+ y = -y;
+ x *= 128;
+
+ const type temp = x/y;
+ if (temp > 309)
+ return '-';
+ else if (temp > 53)
+ return '\\';
+ else
+ return '|';
+ }
+ else
+ {
+ x *= 128;
+
+ const type temp = x/y;
+ if (temp > 309)
+ return '-';
+ else if (temp > 53)
+ return '/';
+ else
+ return '|';
+ }
+ }
+
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type
+ >
+ void sobel_edge_detector (
+ const in_image_type& in_img_,
+ out_image_type& horz_,
+ out_image_type& vert_
+ )
+ {
+ typedef typename image_traits<out_image_type>::pixel_type pixel_type;
+ COMPILE_TIME_ASSERT(pixel_traits<pixel_type>::is_unsigned == false);
+ DLIB_ASSERT( !is_same_object(in_img_,horz_) && !is_same_object(in_img_,vert_) &&
+ !is_same_object(horz_,vert_),
+ "\tvoid sobel_edge_detector(in_img_, horz_, vert_)"
+ << "\n\t You can't give the same image as more than one argument"
+ << "\n\t is_same_object(in_img_,horz_): " << is_same_object(in_img_,horz_)
+ << "\n\t is_same_object(in_img_,vert_): " << is_same_object(in_img_,vert_)
+ << "\n\t is_same_object(horz_,vert_): " << is_same_object(horz_,vert_)
+ );
+
+
+ const int vert_filter[3][3] = {{-1,-2,-1},
+ {0,0,0},
+ {1,2,1}};
+ const int horz_filter[3][3] = { {-1,0,1},
+ {-2,0,2},
+ {-1,0,1}};
+
+ const long M = 3;
+ const long N = 3;
+
+
+ const_image_view<in_image_type> in_img(in_img_);
+ image_view<out_image_type> horz(horz_);
+ image_view<out_image_type> vert(vert_);
+
+ horz.set_size(in_img.nr(),in_img.nc());
+ vert.set_size(in_img.nr(),in_img.nc());
+
+ assign_border_pixels(horz,1,1,0);
+ assign_border_pixels(vert,1,1,0);
+
+ // figure out the range that we should apply the filter to
+ const long first_row = M/2;
+ const long first_col = N/2;
+ const long last_row = in_img.nr() - M/2;
+ const long last_col = in_img.nc() - N/2;
+
+
+ // apply the filter to the image
+ for (long r = first_row; r < last_row; ++r)
+ {
+ for (long c = first_col; c < last_col; ++c)
+ {
+ typedef typename pixel_traits<typename image_traits<in_image_type>::pixel_type>::basic_pixel_type bp_type;
+
+ typename promote<bp_type>::type p, horz_temp, vert_temp;
+ horz_temp = 0;
+ vert_temp = 0;
+ for (long m = 0; m < M; ++m)
+ {
+ for (long n = 0; n < N; ++n)
+ {
+ // pull out the current pixel and put it into p
+ p = get_pixel_intensity(in_img[r-M/2+m][c-N/2+n]);
+
+ horz_temp += p*horz_filter[m][n];
+ vert_temp += p*vert_filter[m][n];
+ }
+ }
+
+ assign_pixel(horz[r][c] , horz_temp);
+ assign_pixel(vert[r][c] , vert_temp);
+
+ }
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ namespace impl
+ {
+ template <typename T>
+ typename promote<T>::type square (const T& a)
+ {
+ return static_cast<T>(a)*static_cast<T>(a);
+ }
+ }
+
+ template <
+ typename in_image_type,
+ typename out_image_type
+ >
+ void suppress_non_maximum_edges (
+ const in_image_type& horz_,
+ const in_image_type& vert_,
+ out_image_type& out_img_
+ )
+ {
+ const_image_view<in_image_type> horz(horz_);
+ const_image_view<in_image_type> vert(vert_);
+ image_view<out_image_type> out_img(out_img_);
+
+ COMPILE_TIME_ASSERT(is_signed_type<typename image_traits<in_image_type>::pixel_type>::value);
+ DLIB_ASSERT( horz.nr() == vert.nr() && horz.nc() == vert.nc(),
+ "\tvoid suppress_non_maximum_edges(horz, vert, out_img)"
+ << "\n\tYou have to give horz and vert gradient images that are the same size"
+ << "\n\thorz.nr(): " << horz.nr()
+ << "\n\thorz.nc(): " << horz.nc()
+ << "\n\tvert.nr(): " << vert.nr()
+ << "\n\tvert.nc(): " << vert.nc()
+ );
+ DLIB_ASSERT( !is_same_object(out_img_,horz_) && !is_same_object(out_img_,vert_),
+ "\tvoid suppress_non_maximum_edges(horz_, vert_, out_img_)"
+ << "\n\t out_img can't be the same as one of the input images."
+ << "\n\t is_same_object(out_img_,horz_): " << is_same_object(out_img_,horz_)
+ << "\n\t is_same_object(out_img_,vert_): " << is_same_object(out_img_,vert_)
+ );
+
+ using std::min;
+ using std::abs;
+
+
+ // if there isn't any input image then don't do anything
+ if (horz.size() == 0)
+ {
+ out_img.clear();
+ return;
+ }
+
+ out_img.set_size(horz.nr(),horz.nc());
+
+ zero_border_pixels(out_img,1,1);
+
+ // now do non maximum suppression while we copy the
+ const long M = 3;
+ const long N = 3;
+
+ // figure out the range that we should apply the filter to
+ const long first_row = M/2;
+ const long first_col = N/2;
+ const long last_row = horz.nr() - M/2;
+ const long last_col = horz.nc() - N/2;
+
+
+ // apply the filter to the image
+ for (long r = first_row; r < last_row; ++r)
+ {
+ for (long c = first_col; c < last_col; ++c)
+ {
+ typedef typename promote<typename image_traits<in_image_type>::pixel_type>::type T;
+ const T y = horz[r][c];
+ const T x = vert[r][c];
+
+ using impl::square;
+
+ const T val = square(horz[r][c]) + square(vert[r][c]);
+
+ const char ori = edge_orientation(x,y);
+ const unsigned char zero = 0;
+ switch (ori)
+ {
+ case '-':
+ if (square(horz[r-1][c])+square(vert[r-1][c]) > val || square(horz[r+1][c]) + square(vert[r+1][c]) > val)
+ assign_pixel(out_img[r][c] , zero);
+ else
+ assign_pixel(out_img[r][c] , std::sqrt((double)val));
+ break;
+
+ case '|':
+ if (square(horz[r][c-1]) + square(vert[r][c-1]) > val || square(horz[r][c+1]) + square(vert[r][c+1]) > val)
+ assign_pixel(out_img[r][c] , zero);
+ else
+ assign_pixel(out_img[r][c] , std::sqrt((double)val));
+ break;
+
+ case '/':
+ if (square(horz[r-1][c-1]) + square(vert[r-1][c-1]) > val || square(horz[r+1][c+1]) + square(vert[r+1][c+1]) > val)
+ assign_pixel(out_img[r][c] , zero);
+ else
+ assign_pixel(out_img[r][c] , std::sqrt((double)val));
+ break;
+
+ case '\\':
+ if (square(horz[r+1][c-1]) + square(vert[r+1][c-1]) > val || square(horz[r-1][c+1]) + square(vert[r-1][c+1]) > val)
+ assign_pixel(out_img[r][c] , zero);
+ else
+ assign_pixel(out_img[r][c] , std::sqrt((double)val));
+ break;
+
+ }
+ }
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_EDGE_DETECTOr_
+
+
+
diff --git a/ml/dlib/dlib/image_transforms/edge_detector_abstract.h b/ml/dlib/dlib/image_transforms/edge_detector_abstract.h
new file mode 100644
index 000000000..42c991665
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/edge_detector_abstract.h
@@ -0,0 +1,112 @@
+// Copyright (C) 2008 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_EDGE_DETECTOr_ABSTRACT_
+#ifdef DLIB_EDGE_DETECTOr_ABSTRACT_
+
+#include "../pixel.h"
+#include "../image_processing/generic_image.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename T
+ >
+ inline char edge_orientation (
+ const T& x,
+ const T& y
+ );
+ /*!
+ ensures
+ - returns the orientation of the line drawn from the origin to the point (x,y).
+ The orientation is represented pictorially using the four ascii
+ characters /,|,\, and -.
+ - if (the line is horizontal) then
+ returns '-'
+ - if (the line is vertical) then
+ returns '|'
+ - if (the line is diagonal with a positive slope) then
+ returns '/'
+ - if (the line is diagonal with a negative slope) then
+ returns '\\'
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type
+ >
+ void sobel_edge_detector (
+ const in_image_type& in_img,
+ out_image_type& horz,
+ out_image_type& vert
+ );
+ /*!
+ requires
+ - in_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - out_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - out_image_type must use signed grayscale pixels
+ - is_same_object(in_img,horz) == false
+ - is_same_object(in_img,vert) == false
+ - is_same_object(horz,vert) == false
+ ensures
+ - Applies the sobel edge detector to the given input image and stores the resulting
+ edge detections in the horz and vert images
+ - #horz.nr() == in_img.nr()
+ - #horz.nc() == in_img.nc()
+ - #vert.nr() == in_img.nr()
+ - #vert.nc() == in_img.nc()
+ - for all valid r and c:
+ - #horz[r][c] == the magnitude of the horizontal gradient at the point in_img[r][c]
+ - #vert[r][c] == the magnitude of the vertical gradient at the point in_img[r][c]
+ - edge_orientation(#vert[r][c], #horz[r][c]) == the edge direction at this point in
+ the image
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type
+ >
+ void suppress_non_maximum_edges (
+ const in_image_type& horz,
+ const in_image_type& vert,
+ out_image_type& out_img
+ );
+ /*!
+ requires
+ - in_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - out_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - horz.nr() == vert.nr()
+ - horz.nc() == vert.nc()
+ - is_same_object(out_img, horz) == false
+ - is_same_object(out_img, vert) == false
+ - image_traits<in_image_type>::pixel_type == A signed scalar type (e.g. int, double, etc.)
+ ensures
+ - #out_img.nr() = horz.nr()
+ - #out_img.nc() = horz.nc()
+ - let edge_strength(r,c) == sqrt(pow(horz[r][c],2) + pow(vert[r][c],2))
+ (i.e. The Euclidean norm of the gradient)
+ - for all valid r and c:
+ - if (edge_strength(r,c) is at a maximum with respect to its 2 neighboring
+ pixels along the line given by edge_orientation(vert[r][c],horz[r][c])) then
+ - performs assign_pixel(#out_img[r][c], edge_strength(r,c))
+ - else
+ - performs assign_pixel(#out_img[r][c], 0)
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_EDGE_DETECTOr_ABSTRACT_
+
+
diff --git a/ml/dlib/dlib/image_transforms/equalize_histogram.h b/ml/dlib/dlib/image_transforms/equalize_histogram.h
new file mode 100644
index 000000000..dd048759a
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/equalize_histogram.h
@@ -0,0 +1,143 @@
+// Copyright (C) 2006 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_EQUALIZE_HISTOGRAm_
+#define DLIB_EQUALIZE_HISTOGRAm_
+
+#include "../pixel.h"
+#include "equalize_histogram_abstract.h"
+#include <vector>
+#include "../enable_if.h"
+#include "../matrix.h"
+
+namespace dlib
+{
+
+// ---------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ long R,
+ long C,
+ typename MM
+ >
+ void get_histogram (
+ const in_image_type& in_img_,
+ matrix<unsigned long,R,C,MM>& hist
+ )
+ {
+ typedef typename image_traits<in_image_type>::pixel_type pixel_type;
+ COMPILE_TIME_ASSERT( pixel_traits<pixel_type>::is_unsigned == true );
+
+ typedef typename pixel_traits<pixel_type>::basic_pixel_type in_image_basic_pixel_type;
+ COMPILE_TIME_ASSERT( sizeof(in_image_basic_pixel_type) <= 2);
+
+ // make sure hist is the right size
+ if (R == 1)
+ hist.set_size(1,pixel_traits<pixel_type>::max()+1);
+ else
+ hist.set_size(pixel_traits<pixel_type>::max()+1,1);
+
+
+ set_all_elements(hist,0);
+
+ const_image_view<in_image_type> in_img(in_img_);
+ // compute the histogram
+ for (long r = 0; r < in_img.nr(); ++r)
+ {
+ for (long c = 0; c < in_img.nc(); ++c)
+ {
+ unsigned long p = get_pixel_intensity(in_img[r][c]);
+ ++hist(p);
+ }
+ }
+ }
+
+// ---------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type
+ >
+ void equalize_histogram (
+ const in_image_type& in_img_,
+ out_image_type& out_img_
+ )
+ {
+ const_image_view<in_image_type> in_img(in_img_);
+ image_view<out_image_type> out_img(out_img_);
+
+ typedef typename image_traits<in_image_type>::pixel_type in_pixel_type;
+ typedef typename image_traits<out_image_type>::pixel_type out_pixel_type;
+
+ COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false );
+ COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false );
+
+ COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::is_unsigned == true );
+ COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::is_unsigned == true );
+
+ typedef typename pixel_traits<in_pixel_type>::basic_pixel_type in_image_basic_pixel_type;
+ COMPILE_TIME_ASSERT( sizeof(in_image_basic_pixel_type) <= 2);
+
+
+ // if there isn't any input image then don't do anything
+ if (in_img.size() == 0)
+ {
+ out_img.clear();
+ return;
+ }
+
+ out_img.set_size(in_img.nr(),in_img.nc());
+
+ unsigned long p;
+
+ matrix<unsigned long,1,0> histogram;
+ get_histogram(in_img_, histogram);
+ in_img = in_img_;
+
+ double scale = pixel_traits<out_pixel_type>::max();
+ if (in_img.size() > histogram(0))
+ scale /= in_img.size()-histogram(0);
+ else
+ scale = 0;
+
+ // make the black pixels remain black in the output image
+ histogram(0) = 0;
+
+ // compute the transform function
+ for (long i = 1; i < histogram.size(); ++i)
+ histogram(i) += histogram(i-1);
+ // scale so that it is in the range [0,pixel_traits<out_pixel_type>::max()]
+ for (long i = 0; i < histogram.size(); ++i)
+ histogram(i) = static_cast<unsigned long>(histogram(i)*scale);
+
+ // now do the transform
+ for (long row = 0; row < in_img.nr(); ++row)
+ {
+ for (long col = 0; col < in_img.nc(); ++col)
+ {
+ p = histogram(get_pixel_intensity(in_img[row][col]));
+ assign_pixel(out_img[row][col], in_img[row][col]);
+ assign_pixel_intensity(out_img[row][col],p);
+ }
+ }
+
+ }
+
+ template <
+ typename image_type
+ >
+ void equalize_histogram (
+ image_type& img
+ )
+ {
+ equalize_histogram(img,img);
+ }
+
+// ---------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_EQUALIZE_HISTOGRAm_
+
+
+
diff --git a/ml/dlib/dlib/image_transforms/equalize_histogram_abstract.h b/ml/dlib/dlib/image_transforms/equalize_histogram_abstract.h
new file mode 100644
index 000000000..2592aef1a
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/equalize_histogram_abstract.h
@@ -0,0 +1,91 @@
+// Copyright (C) 2006 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_EQUALIZE_HISTOGRAm_ABSTRACT_
+#ifdef DLIB_EQUALIZE_HISTOGRAm_ABSTRACT_
+
+#include "../pixel.h"
+#include "../matrix.h"
+#include "../image_processing/generic_image.h"
+
+namespace dlib
+{
+
+// ---------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type
+ >
+ void equalize_histogram (
+ const in_image_type& in_img,
+ out_image_type& out_img
+ );
+ /*!
+ requires
+ - in_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - out_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - Let pixel_type be the type of pixel in either input or output images, then we
+ must have:
+ - pixel_traits<pixel_type>::has_alpha == false
+ - pixel_traits<pixel_type>::is_unsigned == true
+ - For the input image pixel type, we have the additional requirement that:
+ - pixel_traits<pixel_type>::max() <= 65535
+ ensures
+ - #out_img == the histogram equalized version of in_img
+ - #out_img.nc() == in_img.nc()
+ - #out_img.nr() == in_img.nr()
+ !*/
+
+ template <
+ typename image_type
+ >
+ void equalize_histogram (
+ image_type& img
+ );
+ /*!
+ requires
+ - it is valid to call equalize_histogram(img,img)
+ ensures
+ - calls equalize_histogram(img,img);
+ !*/
+
+// ---------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ long R,
+ long C,
+ typename MM
+ >
+ void get_histogram (
+ const in_image_type& in_img,
+ matrix<unsigned long,R,C,MM>& hist
+ );
+ /*!
+ requires
+ - in_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - Let pixel_type denote the type of pixel in in_img, then we must have:
+ - pixel_traits<pixel_type>::is_unsigned == true
+ - pixel_traits<pixel_type>::max() <= 65535
+ - hist must be capable of representing a column vector of length
+ pixel_traits<typename in_image_type>::max(). I.e. if R and C are nonzero
+ then they must be values that don't conflict with the previous sentence.
+ ensures
+ - #hist.size() == pixel_traits<typename in_image_type>::max()
+ - #hist.nc() == 1 || #hist.nr() == 1 (i.e. hist is either a row or column vector)
+ - #hist == the histogram for in_img. I.e. it is the case that for all
+ valid i:
+ - hist(i) == the number of times a pixel with intensity i appears
+ in in_img
+ !*/
+
+// ---------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_EQUALIZE_HISTOGRAm_ABSTRACT_
+
+
diff --git a/ml/dlib/dlib/image_transforms/fhog.h b/ml/dlib/dlib/image_transforms/fhog.h
new file mode 100644
index 000000000..d99973adf
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/fhog.h
@@ -0,0 +1,1404 @@
+// Copyright (C) 2013 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_fHOG_Hh_
+#define DLIB_fHOG_Hh_
+
+#include "fhog_abstract.h"
+#include "../matrix.h"
+#include "../array2d.h"
+#include "../array.h"
+#include "../geometry.h"
+#include "assign_image.h"
+#include "draw.h"
+#include "interpolation.h"
+#include "../simd.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ namespace impl_fhog
+ {
+ template <typename image_type, typename T>
+ inline typename dlib::enable_if_c<pixel_traits<typename image_type::pixel_type>::rgb>::type get_gradient (
+ const int r,
+ const int c,
+ const image_type& img,
+ matrix<T,2,1>& grad,
+ T& len
+ )
+ {
+ matrix<T, 2, 1> grad2, grad3;
+ // get the red gradient
+ grad(0) = (int)img[r][c+1].red-(int)img[r][c-1].red;
+ grad(1) = (int)img[r+1][c].red-(int)img[r-1][c].red;
+ len = length_squared(grad);
+
+ // get the green gradient
+ grad2(0) = (int)img[r][c+1].green-(int)img[r][c-1].green;
+ grad2(1) = (int)img[r+1][c].green-(int)img[r-1][c].green;
+ T v2 = length_squared(grad2);
+
+ // get the blue gradient
+ grad3(0) = (int)img[r][c+1].blue-(int)img[r][c-1].blue;
+ grad3(1) = (int)img[r+1][c].blue-(int)img[r-1][c].blue;
+ T v3 = length_squared(grad3);
+
+ // pick color with strongest gradient
+ if (v2 > len)
+ {
+ len = v2;
+ grad = grad2;
+ }
+ if (v3 > len)
+ {
+ len = v3;
+ grad = grad3;
+ }
+ }
+
+ template <typename image_type>
+ inline typename dlib::enable_if_c<pixel_traits<typename image_type::pixel_type>::rgb>::type get_gradient (
+ const int r,
+ const int c,
+ const image_type& img,
+ simd4f& grad_x,
+ simd4f& grad_y,
+ simd4f& len
+ )
+ {
+ simd4i rleft((int)img[r][c-1].red,
+ (int)img[r][c].red,
+ (int)img[r][c+1].red,
+ (int)img[r][c+2].red);
+ simd4i rright((int)img[r][c+1].red,
+ (int)img[r][c+2].red,
+ (int)img[r][c+3].red,
+ (int)img[r][c+4].red);
+ simd4i rtop((int)img[r-1][c].red,
+ (int)img[r-1][c+1].red,
+ (int)img[r-1][c+2].red,
+ (int)img[r-1][c+3].red);
+ simd4i rbottom((int)img[r+1][c].red,
+ (int)img[r+1][c+1].red,
+ (int)img[r+1][c+2].red,
+ (int)img[r+1][c+3].red);
+
+ simd4i gleft((int)img[r][c-1].green,
+ (int)img[r][c].green,
+ (int)img[r][c+1].green,
+ (int)img[r][c+2].green);
+ simd4i gright((int)img[r][c+1].green,
+ (int)img[r][c+2].green,
+ (int)img[r][c+3].green,
+ (int)img[r][c+4].green);
+ simd4i gtop((int)img[r-1][c].green,
+ (int)img[r-1][c+1].green,
+ (int)img[r-1][c+2].green,
+ (int)img[r-1][c+3].green);
+ simd4i gbottom((int)img[r+1][c].green,
+ (int)img[r+1][c+1].green,
+ (int)img[r+1][c+2].green,
+ (int)img[r+1][c+3].green);
+
+ simd4i bleft((int)img[r][c-1].blue,
+ (int)img[r][c].blue,
+ (int)img[r][c+1].blue,
+ (int)img[r][c+2].blue);
+ simd4i bright((int)img[r][c+1].blue,
+ (int)img[r][c+2].blue,
+ (int)img[r][c+3].blue,
+ (int)img[r][c+4].blue);
+ simd4i btop((int)img[r-1][c].blue,
+ (int)img[r-1][c+1].blue,
+ (int)img[r-1][c+2].blue,
+ (int)img[r-1][c+3].blue);
+ simd4i bbottom((int)img[r+1][c].blue,
+ (int)img[r+1][c+1].blue,
+ (int)img[r+1][c+2].blue,
+ (int)img[r+1][c+3].blue);
+
+ simd4i grad_x_red = rright-rleft;
+ simd4i grad_y_red = rbottom-rtop;
+ simd4i grad_x_green = gright-gleft;
+ simd4i grad_y_green = gbottom-gtop;
+ simd4i grad_x_blue = bright-bleft;
+ simd4i grad_y_blue = bbottom-btop;
+
+ simd4i rlen = grad_x_red*grad_x_red + grad_y_red*grad_y_red;
+ simd4i glen = grad_x_green*grad_x_green + grad_y_green*grad_y_green;
+ simd4i blen = grad_x_blue*grad_x_blue + grad_y_blue*grad_y_blue;
+
+ simd4i cmp = rlen>glen;
+ simd4i tgrad_x = select(cmp,grad_x_red,grad_x_green);
+ simd4i tgrad_y = select(cmp,grad_y_red,grad_y_green);
+ simd4i tlen = select(cmp,rlen,glen);
+
+ cmp = tlen>blen;
+ grad_x = select(cmp,tgrad_x,grad_x_blue);
+ grad_y = select(cmp,tgrad_y,grad_y_blue);
+ len = select(cmp,tlen,blen);
+ }
+
+ // ------------------------------------------------------------------------------------
+
+ template <typename image_type>
+ inline typename dlib::enable_if_c<pixel_traits<typename image_type::pixel_type>::rgb>::type get_gradient(
+ const int r,
+ const int c,
+ const image_type& img,
+ simd8f& grad_x,
+ simd8f& grad_y,
+ simd8f& len
+ )
+ {
+ simd8i rleft((int)img[r][c - 1].red,
+ (int)img[r][c].red,
+ (int)img[r][c + 1].red,
+ (int)img[r][c + 2].red,
+ (int)img[r][c + 3].red,
+ (int)img[r][c + 4].red,
+ (int)img[r][c + 5].red,
+ (int)img[r][c + 6].red);
+ simd8i rright((int)img[r][c + 1].red,
+ (int)img[r][c + 2].red,
+ (int)img[r][c + 3].red,
+ (int)img[r][c + 4].red,
+ (int)img[r][c + 5].red,
+ (int)img[r][c + 6].red,
+ (int)img[r][c + 7].red,
+ (int)img[r][c + 8].red);
+ simd8i rtop((int)img[r - 1][c].red,
+ (int)img[r - 1][c + 1].red,
+ (int)img[r - 1][c + 2].red,
+ (int)img[r - 1][c + 3].red,
+ (int)img[r - 1][c + 4].red,
+ (int)img[r - 1][c + 5].red,
+ (int)img[r - 1][c + 6].red,
+ (int)img[r - 1][c + 7].red);
+ simd8i rbottom((int)img[r + 1][c].red,
+ (int)img[r + 1][c + 1].red,
+ (int)img[r + 1][c + 2].red,
+ (int)img[r + 1][c + 3].red,
+ (int)img[r + 1][c + 4].red,
+ (int)img[r + 1][c + 5].red,
+ (int)img[r + 1][c + 6].red,
+ (int)img[r + 1][c + 7].red);
+
+ simd8i gleft((int)img[r][c - 1].green,
+ (int)img[r][c].green,
+ (int)img[r][c + 1].green,
+ (int)img[r][c + 2].green,
+ (int)img[r][c + 3].green,
+ (int)img[r][c + 4].green,
+ (int)img[r][c + 5].green,
+ (int)img[r][c + 6].green);
+ simd8i gright((int)img[r][c + 1].green,
+ (int)img[r][c + 2].green,
+ (int)img[r][c + 3].green,
+ (int)img[r][c + 4].green,
+ (int)img[r][c + 5].green,
+ (int)img[r][c + 6].green,
+ (int)img[r][c + 7].green,
+ (int)img[r][c + 8].green);
+ simd8i gtop((int)img[r - 1][c].green,
+ (int)img[r - 1][c + 1].green,
+ (int)img[r - 1][c + 2].green,
+ (int)img[r - 1][c + 3].green,
+ (int)img[r - 1][c + 4].green,
+ (int)img[r - 1][c + 5].green,
+ (int)img[r - 1][c + 6].green,
+ (int)img[r - 1][c + 7].green);
+ simd8i gbottom((int)img[r + 1][c].green,
+ (int)img[r + 1][c + 1].green,
+ (int)img[r + 1][c + 2].green,
+ (int)img[r + 1][c + 3].green,
+ (int)img[r + 1][c + 4].green,
+ (int)img[r + 1][c + 5].green,
+ (int)img[r + 1][c + 6].green,
+ (int)img[r + 1][c + 7].green);
+
+ simd8i bleft((int)img[r][c - 1].blue,
+ (int)img[r][c].blue,
+ (int)img[r][c + 1].blue,
+ (int)img[r][c + 2].blue,
+ (int)img[r][c + 3].blue,
+ (int)img[r][c + 4].blue,
+ (int)img[r][c + 5].blue,
+ (int)img[r][c + 6].blue);
+ simd8i bright((int)img[r][c + 1].blue,
+ (int)img[r][c + 2].blue,
+ (int)img[r][c + 3].blue,
+ (int)img[r][c + 4].blue,
+ (int)img[r][c + 5].blue,
+ (int)img[r][c + 6].blue,
+ (int)img[r][c + 7].blue,
+ (int)img[r][c + 8].blue);
+ simd8i btop((int)img[r - 1][c].blue,
+ (int)img[r - 1][c + 1].blue,
+ (int)img[r - 1][c + 2].blue,
+ (int)img[r - 1][c + 3].blue,
+ (int)img[r - 1][c + 4].blue,
+ (int)img[r - 1][c + 5].blue,
+ (int)img[r - 1][c + 6].blue,
+ (int)img[r - 1][c + 7].blue);
+ simd8i bbottom((int)img[r + 1][c].blue,
+ (int)img[r + 1][c + 1].blue,
+ (int)img[r + 1][c + 2].blue,
+ (int)img[r + 1][c + 3].blue,
+ (int)img[r + 1][c + 4].blue,
+ (int)img[r + 1][c + 5].blue,
+ (int)img[r + 1][c + 6].blue,
+ (int)img[r + 1][c + 7].blue);
+
+ simd8i grad_x_red = rright - rleft;
+ simd8i grad_y_red = rbottom - rtop;
+ simd8i grad_x_green = gright - gleft;
+ simd8i grad_y_green = gbottom - gtop;
+ simd8i grad_x_blue = bright - bleft;
+ simd8i grad_y_blue = bbottom - btop;
+
+ simd8i rlen = grad_x_red*grad_x_red + grad_y_red*grad_y_red;
+ simd8i glen = grad_x_green*grad_x_green + grad_y_green*grad_y_green;
+ simd8i blen = grad_x_blue*grad_x_blue + grad_y_blue*grad_y_blue;
+
+ simd8i cmp = rlen > glen;
+ simd8i tgrad_x = select(cmp, grad_x_red, grad_x_green);
+ simd8i tgrad_y = select(cmp, grad_y_red, grad_y_green);
+ simd8i tlen = select(cmp, rlen, glen);
+
+ cmp = tlen > blen;
+ grad_x = select(cmp, tgrad_x, grad_x_blue);
+ grad_y = select(cmp, tgrad_y, grad_y_blue);
+ len = select(cmp, tlen, blen);
+ }
+
+ // ------------------------------------------------------------------------------------
+
+ template <typename image_type, typename T>
+ inline typename dlib::disable_if_c<pixel_traits<typename image_type::pixel_type>::rgb>::type get_gradient (
+ const int r,
+ const int c,
+ const image_type& img,
+ matrix<T, 2, 1>& grad,
+ T& len
+ )
+ {
+ grad(0) = (int)get_pixel_intensity(img[r][c+1])-(int)get_pixel_intensity(img[r][c-1]);
+ grad(1) = (int)get_pixel_intensity(img[r+1][c])-(int)get_pixel_intensity(img[r-1][c]);
+ len = length_squared(grad);
+ }
+
+ template <typename image_type>
+ inline typename dlib::disable_if_c<pixel_traits<typename image_type::pixel_type>::rgb>::type get_gradient (
+ int r,
+ int c,
+ const image_type& img,
+ simd4f& grad_x,
+ simd4f& grad_y,
+ simd4f& len
+ )
+ {
+ simd4i left((int)get_pixel_intensity(img[r][c-1]),
+ (int)get_pixel_intensity(img[r][c]),
+ (int)get_pixel_intensity(img[r][c+1]),
+ (int)get_pixel_intensity(img[r][c+2]));
+ simd4i right((int)get_pixel_intensity(img[r][c+1]),
+ (int)get_pixel_intensity(img[r][c+2]),
+ (int)get_pixel_intensity(img[r][c+3]),
+ (int)get_pixel_intensity(img[r][c+4]));
+
+ simd4i top((int)get_pixel_intensity(img[r-1][c]),
+ (int)get_pixel_intensity(img[r-1][c+1]),
+ (int)get_pixel_intensity(img[r-1][c+2]),
+ (int)get_pixel_intensity(img[r-1][c+3]));
+ simd4i bottom((int)get_pixel_intensity(img[r+1][c]),
+ (int)get_pixel_intensity(img[r+1][c+1]),
+ (int)get_pixel_intensity(img[r+1][c+2]),
+ (int)get_pixel_intensity(img[r+1][c+3]));
+
+ grad_x = right-left;
+ grad_y = bottom-top;
+
+ len = (grad_x*grad_x + grad_y*grad_y);
+ }
+
+ // ------------------------------------------------------------------------------------
+
+ template <typename image_type>
+ inline typename dlib::disable_if_c<pixel_traits<typename image_type::pixel_type>::rgb>::type get_gradient(
+ int r,
+ int c,
+ const image_type& img,
+ simd8f& grad_x,
+ simd8f& grad_y,
+ simd8f& len
+ )
+ {
+ simd8i left((int)get_pixel_intensity(img[r][c - 1]),
+ (int)get_pixel_intensity(img[r][c]),
+ (int)get_pixel_intensity(img[r][c + 1]),
+ (int)get_pixel_intensity(img[r][c + 2]),
+ (int)get_pixel_intensity(img[r][c + 3]),
+ (int)get_pixel_intensity(img[r][c + 4]),
+ (int)get_pixel_intensity(img[r][c + 5]),
+ (int)get_pixel_intensity(img[r][c + 6]));
+ simd8i right((int)get_pixel_intensity(img[r][c + 1]),
+ (int)get_pixel_intensity(img[r][c + 2]),
+ (int)get_pixel_intensity(img[r][c + 3]),
+ (int)get_pixel_intensity(img[r][c + 4]),
+ (int)get_pixel_intensity(img[r][c + 5]),
+ (int)get_pixel_intensity(img[r][c + 6]),
+ (int)get_pixel_intensity(img[r][c + 7]),
+ (int)get_pixel_intensity(img[r][c + 8]));
+
+ simd8i top((int)get_pixel_intensity(img[r - 1][c]),
+ (int)get_pixel_intensity(img[r - 1][c + 1]),
+ (int)get_pixel_intensity(img[r - 1][c + 2]),
+ (int)get_pixel_intensity(img[r - 1][c + 3]),
+ (int)get_pixel_intensity(img[r - 1][c + 4]),
+ (int)get_pixel_intensity(img[r - 1][c + 5]),
+ (int)get_pixel_intensity(img[r - 1][c + 6]),
+ (int)get_pixel_intensity(img[r - 1][c + 7]));
+ simd8i bottom((int)get_pixel_intensity(img[r + 1][c]),
+ (int)get_pixel_intensity(img[r + 1][c + 1]),
+ (int)get_pixel_intensity(img[r + 1][c + 2]),
+ (int)get_pixel_intensity(img[r + 1][c + 3]),
+ (int)get_pixel_intensity(img[r + 1][c + 4]),
+ (int)get_pixel_intensity(img[r + 1][c + 5]),
+ (int)get_pixel_intensity(img[r + 1][c + 6]),
+ (int)get_pixel_intensity(img[r + 1][c + 7]));
+
+ grad_x = right - left;
+ grad_y = bottom - top;
+
+ len = (grad_x*grad_x + grad_y*grad_y);
+ }
+
+ // ------------------------------------------------------------------------------------
+
+ template <typename T, typename mm1, typename mm2>
+ inline void set_hog (
+ dlib::array<array2d<T,mm1>,mm2>& hog,
+ int o,
+ int x,
+ int y,
+ const float& value
+ )
+ {
+ hog[o][y][x] = value;
+ }
+
+ template <typename T, typename mm1, typename mm2>
+ void init_hog (
+ dlib::array<array2d<T,mm1>,mm2>& hog,
+ int hog_nr,
+ int hog_nc,
+ int filter_rows_padding,
+ int filter_cols_padding
+ )
+ {
+ const int num_hog_bands = 27+4;
+ hog.resize(num_hog_bands);
+ for (int i = 0; i < num_hog_bands; ++i)
+ {
+ hog[i].set_size(hog_nr+filter_rows_padding-1, hog_nc+filter_cols_padding-1);
+ rectangle rect = get_rect(hog[i]);
+ rect.top() += (filter_rows_padding-1)/2;
+ rect.left() += (filter_cols_padding-1)/2;
+ rect.right() -= filter_cols_padding/2;
+ rect.bottom() -= filter_rows_padding/2;
+ zero_border_pixels(hog[i],rect);
+ }
+ }
+
+ template <typename T, typename mm1, typename mm2>
+ void init_hog_zero_everything (
+ dlib::array<array2d<T,mm1>,mm2>& hog,
+ int hog_nr,
+ int hog_nc,
+ int filter_rows_padding,
+ int filter_cols_padding
+ )
+ {
+ const int num_hog_bands = 27+4;
+ hog.resize(num_hog_bands);
+ for (int i = 0; i < num_hog_bands; ++i)
+ {
+ hog[i].set_size(hog_nr+filter_rows_padding-1, hog_nc+filter_cols_padding-1);
+ assign_all_pixels(hog[i], 0);
+ }
+ }
+
+ // ------------------------------------------------------------------------------------
+
+ template <typename T, typename mm>
+ inline void set_hog (
+ array2d<matrix<T,31,1>,mm>& hog,
+ int o,
+ int x,
+ int y,
+ const float& value
+ )
+ {
+ hog[y][x](o) = value;
+ }
+
+ template <typename T, typename mm>
+ void init_hog (
+ array2d<matrix<T,31,1>,mm>& hog,
+ int hog_nr,
+ int hog_nc,
+ int filter_rows_padding,
+ int filter_cols_padding
+ )
+ {
+ hog.set_size(hog_nr+filter_rows_padding-1, hog_nc+filter_cols_padding-1);
+
+ // now zero out the border region
+ rectangle rect = get_rect(hog);
+ rect.top() += (filter_rows_padding-1)/2;
+ rect.left() += (filter_cols_padding-1)/2;
+ rect.right() -= filter_cols_padding/2;
+ rect.bottom() -= filter_rows_padding/2;
+ border_enumerator be(get_rect(hog),rect);
+ while (be.move_next())
+ {
+ const point p = be.element();
+ set_all_elements(hog[p.y()][p.x()], 0);
+ }
+ }
+
+ template <typename T, typename mm>
+ void init_hog_zero_everything (
+ array2d<matrix<T,31,1>,mm>& hog,
+ int hog_nr,
+ int hog_nc,
+ int filter_rows_padding,
+ int filter_cols_padding
+ )
+ {
+ hog.set_size(hog_nr+filter_rows_padding-1, hog_nc+filter_cols_padding-1);
+
+ for (long r = 0; r < hog.nr(); ++r)
+ {
+ for (long c = 0; c < hog.nc(); ++c)
+ {
+ set_all_elements(hog[r][c], 0);
+ }
+ }
+ }
+
+ // ------------------------------------------------------------------------------------
+
+ template <
+ typename image_type,
+ typename out_type
+ >
+ void impl_extract_fhog_features_cell_size_1(
+ const image_type& img_,
+ out_type& hog,
+ int filter_rows_padding,
+ int filter_cols_padding
+ )
+ {
+ const_image_view<image_type> img(img_);
+ // make sure requires clause is not broken
+ DLIB_ASSERT( filter_rows_padding > 0 &&
+ filter_cols_padding > 0 ,
+ "\t void extract_fhog_features()"
+ << "\n\t Invalid inputs were given to this function. "
+ << "\n\t filter_rows_padding: " << filter_rows_padding
+ << "\n\t filter_cols_padding: " << filter_cols_padding
+ );
+
+ /*
+ This function is an optimized version of impl_extract_fhog_features() for
+ the case where cell_size == 1.
+ */
+
+
+ // unit vectors used to compute gradient orientation
+ matrix<float,2,1> directions[9];
+ directions[0] = 1.0000, 0.0000;
+ directions[1] = 0.9397, 0.3420;
+ directions[2] = 0.7660, 0.6428;
+ directions[3] = 0.500, 0.8660;
+ directions[4] = 0.1736, 0.9848;
+ directions[5] = -0.1736, 0.9848;
+ directions[6] = -0.5000, 0.8660;
+ directions[7] = -0.7660, 0.6428;
+ directions[8] = -0.9397, 0.3420;
+
+
+
+ if (img.nr() <= 2 || img.nc() <= 2)
+ {
+ hog.clear();
+ return;
+ }
+
+ array2d<unsigned char> angle(img.nr(), img.nc());
+
+ array2d<float> norm(img.nr(), img.nc());
+ zero_border_pixels(norm,1,1);
+
+ // memory for HOG features
+ const long hog_nr = img.nr()-2;
+ const long hog_nc = img.nc()-2;
+
+ const int padding_rows_offset = (filter_rows_padding-1)/2;
+ const int padding_cols_offset = (filter_cols_padding-1)/2;
+ init_hog_zero_everything(hog, hog_nr, hog_nc, filter_rows_padding, filter_cols_padding);
+
+
+ const int visible_nr = img.nr()-1;
+ const int visible_nc = img.nc()-1;
+
+ // First populate the gradient histograms
+ for (int y = 1; y < visible_nr; y++)
+ {
+ int x;
+ for (x = 1; x < visible_nc - 7; x += 8)
+ {
+ // v will be the length of the gradient vectors.
+ simd8f grad_x, grad_y, v;
+ get_gradient(y, x, img, grad_x, grad_y, v);
+
+ float _vv[8];
+ v.store(_vv);
+
+ // Now snap the gradient to one of 18 orientations
+ simd8f best_dot = 0;
+ simd8f best_o = 0;
+ for (int o = 0; o < 9; o++)
+ {
+ simd8f dot = grad_x*directions[o](0) + grad_y*directions[o](1);
+ simd8f_bool cmp = dot>best_dot;
+ best_dot = select(cmp, dot, best_dot);
+ dot *= -1;
+ best_o = select(cmp, o, best_o);
+
+ cmp = dot > best_dot;
+ best_dot = select(cmp, dot, best_dot);
+ best_o = select(cmp, o + 9, best_o);
+ }
+
+ int32 _best_o[8]; simd8i(best_o).store(_best_o);
+
+ norm[y][x + 0] = _vv[0];
+ norm[y][x + 1] = _vv[1];
+ norm[y][x + 2] = _vv[2];
+ norm[y][x + 3] = _vv[3];
+ norm[y][x + 4] = _vv[4];
+ norm[y][x + 5] = _vv[5];
+ norm[y][x + 6] = _vv[6];
+ norm[y][x + 7] = _vv[7];
+
+ angle[y][x + 0] = _best_o[0];
+ angle[y][x + 1] = _best_o[1];
+ angle[y][x + 2] = _best_o[2];
+ angle[y][x + 3] = _best_o[3];
+ angle[y][x + 4] = _best_o[4];
+ angle[y][x + 5] = _best_o[5];
+ angle[y][x + 6] = _best_o[6];
+ angle[y][x + 7] = _best_o[7];
+ }
+ // Now process the right columns that don't fit into simd registers.
+ for (; x < visible_nc; x++)
+ {
+ matrix<float,2,1> grad;
+ float v;
+ get_gradient(y,x,img,grad,v);
+
+ // snap to one of 18 orientations
+ float best_dot = 0;
+ int best_o = 0;
+ for (int o = 0; o < 9; o++)
+ {
+ const float dot = dlib::dot(directions[o], grad);
+ if (dot > best_dot)
+ {
+ best_dot = dot;
+ best_o = o;
+ }
+ else if (-dot > best_dot)
+ {
+ best_dot = -dot;
+ best_o = o+9;
+ }
+ }
+
+ norm[y][x] = v;
+ angle[y][x] = best_o;
+ }
+ }
+
+ const float eps = 0.0001;
+ // compute features
+ for (int y = 0; y < hog_nr; y++)
+ {
+ const int yy = y+padding_rows_offset;
+ for (int x = 0; x < hog_nc; x++)
+ {
+ const simd4f z1(norm[y+1][x+1],
+ norm[y][x+1],
+ norm[y+1][x],
+ norm[y][x]);
+
+ const simd4f z2(norm[y+1][x+2],
+ norm[y][x+2],
+ norm[y+1][x+1],
+ norm[y][x+1]);
+
+ const simd4f z3(norm[y+2][x+1],
+ norm[y+1][x+1],
+ norm[y+2][x],
+ norm[y+1][x]);
+
+ const simd4f z4(norm[y+2][x+2],
+ norm[y+1][x+2],
+ norm[y+2][x+1],
+ norm[y+1][x+1]);
+
+ const simd4f temp0 = std::sqrt(norm[y+1][x+1]);
+ const simd4f nn = 0.2*sqrt(z1+z2+z3+z4+eps);
+ const simd4f n = 0.1/nn;
+
+ simd4f t = 0;
+
+ const int xx = x+padding_cols_offset;
+
+ simd4f h0 = min(temp0,nn)*n;
+ const float vv = sum(h0);
+ set_hog(hog,angle[y+1][x+1],xx,yy, vv);
+ t += h0;
+
+ t *= 2*0.2357;
+
+ // contrast-insensitive features
+ set_hog(hog,angle[y+1][x+1]%9+18,xx,yy, vv);
+
+
+ float temp[4];
+ t.store(temp);
+
+ // texture features
+ set_hog(hog,27,xx,yy, temp[0]);
+ set_hog(hog,28,xx,yy, temp[1]);
+ set_hog(hog,29,xx,yy, temp[2]);
+ set_hog(hog,30,xx,yy, temp[3]);
+ }
+ }
+ }
+
+ // ------------------------------------------------------------------------------------
+
+ template <
+ typename image_type,
+ typename out_type
+ >
+ void impl_extract_fhog_features(
+ const image_type& img_,
+ out_type& hog,
+ int cell_size,
+ int filter_rows_padding,
+ int filter_cols_padding
+ )
+ {
+ const_image_view<image_type> img(img_);
+ // make sure requires clause is not broken
+ DLIB_ASSERT( cell_size > 0 &&
+ filter_rows_padding > 0 &&
+ filter_cols_padding > 0 ,
+ "\t void extract_fhog_features()"
+ << "\n\t Invalid inputs were given to this function. "
+ << "\n\t cell_size: " << cell_size
+ << "\n\t filter_rows_padding: " << filter_rows_padding
+ << "\n\t filter_cols_padding: " << filter_cols_padding
+ );
+
+ /*
+ This function implements the HOG feature extraction method described in
+ the paper:
+ P. Felzenszwalb, R. Girshick, D. McAllester, D. Ramanan
+ Object Detection with Discriminatively Trained Part Based Models
+ IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 32, No. 9, Sep. 2010
+
+ Moreover, this function is derived from the HOG feature extraction code
+ from the features.cc file in the voc-releaseX code (see
+ http://people.cs.uchicago.edu/~rbg/latent/) which is has the following
+ license (note that the code has been modified to work with grayscale and
+ color as well as planar and interlaced input and output formats):
+
+ Copyright (C) 2011, 2012 Ross Girshick, Pedro Felzenszwalb
+ Copyright (C) 2008, 2009, 2010 Pedro Felzenszwalb, Ross Girshick
+ Copyright (C) 2007 Pedro Felzenszwalb, Deva Ramanan
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+ if (cell_size == 1)
+ {
+ impl_extract_fhog_features_cell_size_1(img_,hog,filter_rows_padding,filter_cols_padding);
+ return;
+ }
+
+ // unit vectors used to compute gradient orientation
+ matrix<float,2,1> directions[9];
+ directions[0] = 1.0000, 0.0000;
+ directions[1] = 0.9397, 0.3420;
+ directions[2] = 0.7660, 0.6428;
+ directions[3] = 0.500, 0.8660;
+ directions[4] = 0.1736, 0.9848;
+ directions[5] = -0.1736, 0.9848;
+ directions[6] = -0.5000, 0.8660;
+ directions[7] = -0.7660, 0.6428;
+ directions[8] = -0.9397, 0.3420;
+
+
+
+ // First we allocate memory for caching orientation histograms & their norms.
+ const int cells_nr = (int)((float)img.nr()/(float)cell_size + 0.5);
+ const int cells_nc = (int)((float)img.nc()/(float)cell_size + 0.5);
+
+ if (cells_nr == 0 || cells_nc == 0)
+ {
+ hog.clear();
+ return;
+ }
+
+ // We give hist extra padding around the edges (1 cell all the way around the
+ // edge) so we can avoid needing to do boundary checks when indexing into it
+ // later on. So some statements assign to the boundary but those values are
+ // never used.
+ array2d<matrix<float,18,1> > hist(cells_nr+2, cells_nc+2);
+ for (long r = 0; r < hist.nr(); ++r)
+ {
+ for (long c = 0; c < hist.nc(); ++c)
+ {
+ hist[r][c] = 0;
+ }
+ }
+
+ array2d<float> norm(cells_nr, cells_nc);
+ assign_all_pixels(norm, 0);
+
+ // memory for HOG features
+ const int hog_nr = std::max(cells_nr-2, 0);
+ const int hog_nc = std::max(cells_nc-2, 0);
+ if (hog_nr == 0 || hog_nc == 0)
+ {
+ hog.clear();
+ return;
+ }
+ const int padding_rows_offset = (filter_rows_padding-1)/2;
+ const int padding_cols_offset = (filter_cols_padding-1)/2;
+ init_hog(hog, hog_nr, hog_nc, filter_rows_padding, filter_cols_padding);
+
+ const int visible_nr = std::min((long)cells_nr*cell_size,img.nr())-1;
+ const int visible_nc = std::min((long)cells_nc*cell_size,img.nc())-1;
+
+ // First populate the gradient histograms
+ for (int y = 1; y < visible_nr; y++)
+ {
+ const float yp = ((float)y+0.5)/(float)cell_size - 0.5;
+ const int iyp = (int)std::floor(yp);
+ const float vy0 = yp - iyp;
+ const float vy1 = 1.0 - vy0;
+ int x;
+ for (x = 1; x < visible_nc - 7; x += 8)
+ {
+ simd8f xx(x, x + 1, x + 2, x + 3, x + 4, x + 5, x + 6, x + 7);
+ // v will be the length of the gradient vectors.
+ simd8f grad_x, grad_y, v;
+ get_gradient(y, x, img, grad_x, grad_y, v);
+
+ // We will use bilinear interpolation to add into the histogram bins.
+ // So first we precompute the values needed to determine how much each
+ // pixel votes into each bin.
+ simd8f xp = (xx + 0.5) / (float)cell_size + 0.5;
+ simd8i ixp = simd8i(xp);
+ simd8f vx0 = xp - ixp;
+ simd8f vx1 = 1.0f - vx0;
+
+ v = sqrt(v);
+
+ // Now snap the gradient to one of 18 orientations
+ simd8f best_dot = 0;
+ simd8f best_o = 0;
+ for (int o = 0; o < 9; o++)
+ {
+ simd8f dot = grad_x*directions[o](0) + grad_y*directions[o](1);
+ simd8f_bool cmp = dot>best_dot;
+ best_dot = select(cmp, dot, best_dot);
+ dot *= -1;
+ best_o = select(cmp, o, best_o);
+
+ cmp = dot > best_dot;
+ best_dot = select(cmp, dot, best_dot);
+ best_o = select(cmp, o + 9, best_o);
+ }
+
+
+ // Add the gradient magnitude, v, to 4 histograms around pixel using
+ // bilinear interpolation.
+ vx1 *= v;
+ vx0 *= v;
+ // The amounts for each bin
+ simd8f v11 = vy1*vx1;
+ simd8f v01 = vy0*vx1;
+ simd8f v10 = vy1*vx0;
+ simd8f v00 = vy0*vx0;
+
+ int32 _best_o[8]; simd8i(best_o).store(_best_o);
+ int32 _ixp[8]; ixp.store(_ixp);
+ float _v11[8]; v11.store(_v11);
+ float _v01[8]; v01.store(_v01);
+ float _v10[8]; v10.store(_v10);
+ float _v00[8]; v00.store(_v00);
+
+ hist[iyp + 1][_ixp[0]](_best_o[0]) += _v11[0];
+ hist[iyp + 1 + 1][_ixp[0]](_best_o[0]) += _v01[0];
+ hist[iyp + 1][_ixp[0] + 1](_best_o[0]) += _v10[0];
+ hist[iyp + 1 + 1][_ixp[0] + 1](_best_o[0]) += _v00[0];
+
+ hist[iyp + 1][_ixp[1]](_best_o[1]) += _v11[1];
+ hist[iyp + 1 + 1][_ixp[1]](_best_o[1]) += _v01[1];
+ hist[iyp + 1][_ixp[1] + 1](_best_o[1]) += _v10[1];
+ hist[iyp + 1 + 1][_ixp[1] + 1](_best_o[1]) += _v00[1];
+
+ hist[iyp + 1][_ixp[2]](_best_o[2]) += _v11[2];
+ hist[iyp + 1 + 1][_ixp[2]](_best_o[2]) += _v01[2];
+ hist[iyp + 1][_ixp[2] + 1](_best_o[2]) += _v10[2];
+ hist[iyp + 1 + 1][_ixp[2] + 1](_best_o[2]) += _v00[2];
+
+ hist[iyp + 1][_ixp[3]](_best_o[3]) += _v11[3];
+ hist[iyp + 1 + 1][_ixp[3]](_best_o[3]) += _v01[3];
+ hist[iyp + 1][_ixp[3] + 1](_best_o[3]) += _v10[3];
+ hist[iyp + 1 + 1][_ixp[3] + 1](_best_o[3]) += _v00[3];
+
+ hist[iyp + 1][_ixp[4]](_best_o[4]) += _v11[4];
+ hist[iyp + 1 + 1][_ixp[4]](_best_o[4]) += _v01[4];
+ hist[iyp + 1][_ixp[4] + 1](_best_o[4]) += _v10[4];
+ hist[iyp + 1 + 1][_ixp[4] + 1](_best_o[4]) += _v00[4];
+
+ hist[iyp + 1][_ixp[5]](_best_o[5]) += _v11[5];
+ hist[iyp + 1 + 1][_ixp[5]](_best_o[5]) += _v01[5];
+ hist[iyp + 1][_ixp[5] + 1](_best_o[5]) += _v10[5];
+ hist[iyp + 1 + 1][_ixp[5] + 1](_best_o[5]) += _v00[5];
+
+ hist[iyp + 1][_ixp[6]](_best_o[6]) += _v11[6];
+ hist[iyp + 1 + 1][_ixp[6]](_best_o[6]) += _v01[6];
+ hist[iyp + 1][_ixp[6] + 1](_best_o[6]) += _v10[6];
+ hist[iyp + 1 + 1][_ixp[6] + 1](_best_o[6]) += _v00[6];
+
+ hist[iyp + 1][_ixp[7]](_best_o[7]) += _v11[7];
+ hist[iyp + 1 + 1][_ixp[7]](_best_o[7]) += _v01[7];
+ hist[iyp + 1][_ixp[7] + 1](_best_o[7]) += _v10[7];
+ hist[iyp + 1 + 1][_ixp[7] + 1](_best_o[7]) += _v00[7];
+ }
+ // Now process the right columns that don't fit into simd registers.
+ for (; x < visible_nc; x++)
+ {
+ matrix<float, 2, 1> grad;
+ float v;
+ get_gradient(y,x,img,grad,v);
+
+ // snap to one of 18 orientations
+ float best_dot = 0;
+ int best_o = 0;
+ for (int o = 0; o < 9; o++)
+ {
+ const float dot = dlib::dot(directions[o], grad);
+ if (dot > best_dot)
+ {
+ best_dot = dot;
+ best_o = o;
+ }
+ else if (-dot > best_dot)
+ {
+ best_dot = -dot;
+ best_o = o+9;
+ }
+ }
+
+ v = std::sqrt(v);
+ // add to 4 histograms around pixel using bilinear interpolation
+ const float xp = ((double)x + 0.5) / (double)cell_size - 0.5;
+ const int ixp = (int)std::floor(xp);
+ const float vx0 = xp - ixp;
+ const float vx1 = 1.0 - vx0;
+
+ hist[iyp+1][ixp+1](best_o) += vy1*vx1*v;
+ hist[iyp+1+1][ixp+1](best_o) += vy0*vx1*v;
+ hist[iyp+1][ixp+1+1](best_o) += vy1*vx0*v;
+ hist[iyp+1+1][ixp+1+1](best_o) += vy0*vx0*v;
+ }
+ }
+
+ // compute energy in each block by summing over orientations
+ for (int r = 0; r < cells_nr; ++r)
+ {
+ for (int c = 0; c < cells_nc; ++c)
+ {
+ for (int o = 0; o < 9; o++)
+ {
+ norm[r][c] += (hist[r+1][c+1](o) + hist[r+1][c+1](o+9)) * (hist[r+1][c+1](o) + hist[r+1][c+1](o+9));
+ }
+ }
+ }
+
+ const float eps = 0.0001;
+ // compute features
+ for (int y = 0; y < hog_nr; y++)
+ {
+ const int yy = y+padding_rows_offset;
+ for (int x = 0; x < hog_nc; x++)
+ {
+ const simd4f z1(norm[y+1][x+1],
+ norm[y][x+1],
+ norm[y+1][x],
+ norm[y][x]);
+
+ const simd4f z2(norm[y+1][x+2],
+ norm[y][x+2],
+ norm[y+1][x+1],
+ norm[y][x+1]);
+
+ const simd4f z3(norm[y+2][x+1],
+ norm[y+1][x+1],
+ norm[y+2][x],
+ norm[y+1][x]);
+
+ const simd4f z4(norm[y+2][x+2],
+ norm[y+1][x+2],
+ norm[y+2][x+1],
+ norm[y+1][x+1]);
+
+ const simd4f nn = 0.2*sqrt(z1+z2+z3+z4+eps);
+ const simd4f n = 0.1/nn;
+
+ simd4f t = 0;
+
+ const int xx = x+padding_cols_offset;
+
+ // contrast-sensitive features
+ for (int o = 0; o < 18; o+=3)
+ {
+ simd4f temp0(hist[y+1+1][x+1+1](o));
+ simd4f temp1(hist[y+1+1][x+1+1](o+1));
+ simd4f temp2(hist[y+1+1][x+1+1](o+2));
+ simd4f h0 = min(temp0,nn)*n;
+ simd4f h1 = min(temp1,nn)*n;
+ simd4f h2 = min(temp2,nn)*n;
+ set_hog(hog,o,xx,yy, sum(h0));
+ set_hog(hog,o+1,xx,yy, sum(h1));
+ set_hog(hog,o+2,xx,yy, sum(h2));
+ t += h0+h1+h2;
+ }
+
+ t *= 2*0.2357;
+
+ // contrast-insensitive features
+ for (int o = 0; o < 9; o+=3)
+ {
+ simd4f temp0 = hist[y+1+1][x+1+1](o) + hist[y+1+1][x+1+1](o+9);
+ simd4f temp1 = hist[y+1+1][x+1+1](o+1) + hist[y+1+1][x+1+1](o+9+1);
+ simd4f temp2 = hist[y+1+1][x+1+1](o+2) + hist[y+1+1][x+1+1](o+9+2);
+ simd4f h0 = min(temp0,nn)*n;
+ simd4f h1 = min(temp1,nn)*n;
+ simd4f h2 = min(temp2,nn)*n;
+ set_hog(hog,o+18,xx,yy, sum(h0));
+ set_hog(hog,o+18+1,xx,yy, sum(h1));
+ set_hog(hog,o+18+2,xx,yy, sum(h2));
+ }
+
+
+ float temp[4];
+ t.store(temp);
+
+ // texture features
+ set_hog(hog,27,xx,yy, temp[0]);
+ set_hog(hog,28,xx,yy, temp[1]);
+ set_hog(hog,29,xx,yy, temp[2]);
+ set_hog(hog,30,xx,yy, temp[3]);
+ }
+ }
+ }
+
+ // ------------------------------------------------------------------------------------
+
+ inline void create_fhog_bar_images (
+ dlib::array<matrix<float> >& mbars,
+ const long w
+ )
+ {
+ const long bdims = 9;
+ // Make the oriented lines we use to draw on each HOG cell.
+ mbars.resize(bdims);
+ dlib::array<array2d<unsigned char> > bars(bdims);
+ array2d<unsigned char> temp(w,w);
+ for (unsigned long i = 0; i < bars.size(); ++i)
+ {
+ assign_all_pixels(temp, 0);
+ draw_line(temp, point(w/2,0), point(w/2,w-1), 255);
+ rotate_image(temp, bars[i], i*-pi/bars.size());
+
+ mbars[i] = subm(matrix_cast<float>(mat(bars[i])), centered_rect(get_rect(bars[i]),w,w) );
+ }
+ }
+
+ } // end namespace impl_fhog
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type,
+ typename T,
+ typename mm1,
+ typename mm2
+ >
+ void extract_fhog_features(
+ const image_type& img,
+ dlib::array<array2d<T,mm1>,mm2>& hog,
+ int cell_size = 8,
+ int filter_rows_padding = 1,
+ int filter_cols_padding = 1
+ )
+ {
+ impl_fhog::impl_extract_fhog_features(img, hog, cell_size, filter_rows_padding, filter_cols_padding);
+ // If the image is too small then the above function outputs an empty feature map.
+ // But to make things very uniform in usage we require the output to still have the
+ // 31 planes (but they are just empty).
+ if (hog.size() == 0)
+ hog.resize(31);
+ }
+
+ template <
+ typename image_type,
+ typename T,
+ typename mm
+ >
+ void extract_fhog_features(
+ const image_type& img,
+ array2d<matrix<T,31,1>,mm>& hog,
+ int cell_size = 8,
+ int filter_rows_padding = 1,
+ int filter_cols_padding = 1
+ )
+ {
+ impl_fhog::impl_extract_fhog_features(img, hog, cell_size, filter_rows_padding, filter_cols_padding);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type,
+ typename T
+ >
+ void extract_fhog_features(
+ const image_type& img,
+ matrix<T,0,1>& feats,
+ int cell_size = 8,
+ int filter_rows_padding = 1,
+ int filter_cols_padding = 1
+ )
+ {
+ dlib::array<array2d<T> > hog;
+ extract_fhog_features(img, hog, cell_size, filter_rows_padding, filter_cols_padding);
+ feats.set_size(hog.size()*hog[0].size());
+ for (unsigned long i = 0; i < hog.size(); ++i)
+ {
+ const long size = hog[i].size();
+ set_rowm(feats, range(i*size, (i+1)*size-1)) = reshape_to_column_vector(mat(hog[i]));
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type
+ >
+ matrix<double,0,1> extract_fhog_features(
+ const image_type& img,
+ int cell_size = 8,
+ int filter_rows_padding = 1,
+ int filter_cols_padding = 1
+ )
+ {
+ matrix<double, 0, 1> feats;
+ extract_fhog_features(img, feats, cell_size, filter_rows_padding, filter_cols_padding);
+ return feats;
+ }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ inline point image_to_fhog (
+ point p,
+ int cell_size = 8,
+ int filter_rows_padding = 1,
+ int filter_cols_padding = 1
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( cell_size > 0 &&
+ filter_rows_padding > 0 &&
+ filter_cols_padding > 0 ,
+ "\t point image_to_fhog()"
+ << "\n\t Invalid inputs were given to this function. "
+ << "\n\t cell_size: " << cell_size
+ << "\n\t filter_rows_padding: " << filter_rows_padding
+ << "\n\t filter_cols_padding: " << filter_cols_padding
+ );
+
+ // There is a one pixel border around the image.
+ p -= point(1,1);
+ // There is also a 1 "cell" border around the HOG image formation.
+ return p/cell_size - point(1,1) + point((filter_cols_padding-1)/2,(filter_rows_padding-1)/2);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ inline rectangle image_to_fhog (
+ const rectangle& rect,
+ int cell_size = 8,
+ int filter_rows_padding = 1,
+ int filter_cols_padding = 1
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( cell_size > 0 &&
+ filter_rows_padding > 0 &&
+ filter_cols_padding > 0 ,
+ "\t rectangle image_to_fhog()"
+ << "\n\t Invalid inputs were given to this function. "
+ << "\n\t cell_size: " << cell_size
+ << "\n\t filter_rows_padding: " << filter_rows_padding
+ << "\n\t filter_cols_padding: " << filter_cols_padding
+ );
+
+ return rectangle(image_to_fhog(rect.tl_corner(),cell_size,filter_rows_padding,filter_cols_padding),
+ image_to_fhog(rect.br_corner(),cell_size,filter_rows_padding,filter_cols_padding));
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ inline point fhog_to_image (
+ point p,
+ int cell_size = 8,
+ int filter_rows_padding = 1,
+ int filter_cols_padding = 1
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( cell_size > 0 &&
+ filter_rows_padding > 0 &&
+ filter_cols_padding > 0 ,
+ "\t point fhog_to_image()"
+ << "\n\t Invalid inputs were given to this function. "
+ << "\n\t cell_size: " << cell_size
+ << "\n\t filter_rows_padding: " << filter_rows_padding
+ << "\n\t filter_cols_padding: " << filter_cols_padding
+ );
+
+ // Convert to image space and then set to the center of the cell.
+ point offset;
+
+ p = (p+point(1,1)-point((filter_cols_padding-1)/2,(filter_rows_padding-1)/2))*cell_size + point(1,1);
+ if (p.x() >= 0 && p.y() >= 0) offset = point(cell_size/2,cell_size/2);
+ if (p.x() < 0 && p.y() >= 0) offset = point(-cell_size/2,cell_size/2);
+ if (p.x() >= 0 && p.y() < 0) offset = point(cell_size/2,-cell_size/2);
+ if (p.x() < 0 && p.y() < 0) offset = point(-cell_size/2,-cell_size/2);
+ return p + offset;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ inline rectangle fhog_to_image (
+ const rectangle& rect,
+ int cell_size = 8,
+ int filter_rows_padding = 1,
+ int filter_cols_padding = 1
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( cell_size > 0 &&
+ filter_rows_padding > 0 &&
+ filter_cols_padding > 0 ,
+ "\t rectangle fhog_to_image()"
+ << "\n\t Invalid inputs were given to this function. "
+ << "\n\t cell_size: " << cell_size
+ << "\n\t filter_rows_padding: " << filter_rows_padding
+ << "\n\t filter_cols_padding: " << filter_cols_padding
+ );
+
+ return rectangle(fhog_to_image(rect.tl_corner(),cell_size,filter_rows_padding,filter_cols_padding),
+ fhog_to_image(rect.br_corner(),cell_size,filter_rows_padding,filter_cols_padding));
+ }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename T,
+ typename mm1,
+ typename mm2
+ >
+ matrix<unsigned char> draw_fhog(
+ const dlib::array<array2d<T,mm1>,mm2>& hog,
+ const long cell_draw_size = 15,
+ const float min_response_threshold = 0.0
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( cell_draw_size > 0 && hog.size()==31,
+ "\t matrix<unsigned char> draw_fhog()"
+ << "\n\t Invalid inputs were given to this function. "
+ << "\n\t cell_draw_size: " << cell_draw_size
+ << "\n\t hog.size(): " << hog.size()
+ );
+
+ dlib::array<matrix<float> > mbars;
+ impl_fhog::create_fhog_bar_images(mbars,cell_draw_size);
+
+ // now draw the bars onto the HOG cells
+ matrix<float> himg(hog[0].nr()*cell_draw_size, hog[0].nc()*cell_draw_size);
+ himg = 0;
+ for (unsigned long d = 0; d < mbars.size(); ++d)
+ {
+ for (long r = 0; r < himg.nr(); r+=cell_draw_size)
+ {
+ for (long c = 0; c < himg.nc(); c+=cell_draw_size)
+ {
+ const float val = hog[d][r/cell_draw_size][c/cell_draw_size] +
+ hog[d+mbars.size()][r/cell_draw_size][c/cell_draw_size] +
+ hog[d+mbars.size()*2][r/cell_draw_size][c/cell_draw_size];
+ if (val > min_response_threshold)
+ {
+ set_subm(himg, r, c, cell_draw_size, cell_draw_size) += val*mbars[d%mbars.size()];
+ }
+ }
+ }
+ }
+
+ const float thresh = mean(himg) + 4 * stddev(himg);
+ if (thresh != 0)
+ return matrix_cast<unsigned char>(upperbound(round(himg*255/thresh),255));
+ else
+ return matrix_cast<unsigned char>(himg);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename T
+ >
+ matrix<unsigned char> draw_fhog (
+ const std::vector<matrix<T> >& hog,
+ const long cell_draw_size = 15,
+ const float min_response_threshold = 0.0
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( cell_draw_size > 0 && hog.size()==31,
+ "\t matrix<unsigned char> draw_fhog()"
+ << "\n\t Invalid inputs were given to this function. "
+ << "\n\t cell_draw_size: " << cell_draw_size
+ << "\n\t hog.size(): " << hog.size()
+ );
+
+ // Just convert the input into the right object and then call the above draw_fhog()
+ // function on it.
+ dlib::array<array2d<T> > temp(hog.size());
+ for (unsigned long i = 0; i < temp.size(); ++i)
+ {
+ temp[i].set_size(hog[i].nr(), hog[i].nc());
+ for (long r = 0; r < hog[i].nr(); ++r)
+ {
+ for (long c = 0; c < hog[i].nc(); ++c)
+ {
+ temp[i][r][c] = hog[i](r,c);
+ }
+ }
+ }
+ return draw_fhog(temp,cell_draw_size, min_response_threshold);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename T,
+ typename mm
+ >
+ matrix<unsigned char> draw_fhog(
+ const array2d<matrix<T,31,1>,mm>& hog,
+ const long cell_draw_size = 15,
+ const float min_response_threshold = 0.0
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( cell_draw_size > 0,
+ "\t matrix<unsigned char> draw_fhog()"
+ << "\n\t Invalid inputs were given to this function. "
+ << "\n\t cell_draw_size: " << cell_draw_size
+ );
+
+ dlib::array<matrix<float> > mbars;
+ impl_fhog::create_fhog_bar_images(mbars,cell_draw_size);
+
+ // now draw the bars onto the HOG cells
+ matrix<float> himg(hog.nr()*cell_draw_size, hog.nc()*cell_draw_size);
+ himg = 0;
+ for (unsigned long d = 0; d < mbars.size(); ++d)
+ {
+ for (long r = 0; r < himg.nr(); r+=cell_draw_size)
+ {
+ for (long c = 0; c < himg.nc(); c+=cell_draw_size)
+ {
+ const float val = hog[r/cell_draw_size][c/cell_draw_size](d) +
+ hog[r/cell_draw_size][c/cell_draw_size](d+mbars.size()) +
+ hog[r/cell_draw_size][c/cell_draw_size](d+mbars.size()*2);
+ if (val > min_response_threshold)
+ {
+ set_subm(himg, r, c, cell_draw_size, cell_draw_size) += val*mbars[d%mbars.size()];
+ }
+ }
+ }
+ }
+
+ const float thresh = mean(himg) + 4 * stddev(himg);
+ if (thresh != 0)
+ return matrix_cast<unsigned char>(upperbound(round(himg*255/thresh),255));
+ else
+ return matrix_cast<unsigned char>(himg);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_fHOG_Hh_
+
diff --git a/ml/dlib/dlib/image_transforms/fhog_abstract.h b/ml/dlib/dlib/image_transforms/fhog_abstract.h
new file mode 100644
index 000000000..f66c5d55a
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/fhog_abstract.h
@@ -0,0 +1,346 @@
+// Copyright (C) 2013 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_fHOG_ABSTRACT_Hh_
+#ifdef DLIB_fHOG_ABSTRACT_Hh_
+
+#include "../matrix/matrix_abstract.h"
+#include "../array2d/array2d_kernel_abstract.h"
+#include "../array/array_kernel_abstract.h"
+#include "../image_processing/generic_image.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type,
+ typename T,
+ typename mm
+ >
+ void extract_fhog_features(
+ const image_type& img,
+ array2d<matrix<T,31,1>,mm>& hog,
+ int cell_size = 8,
+ int filter_rows_padding = 1,
+ int filter_cols_padding = 1
+ );
+ /*!
+ requires
+ - cell_size > 0
+ - filter_rows_padding > 0
+ - filter_cols_padding > 0
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - T should be float or double
+ ensures
+ - This function implements the HOG feature extraction method described in
+ the paper:
+ Object Detection with Discriminatively Trained Part Based Models by
+ P. Felzenszwalb, R. Girshick, D. McAllester, D. Ramanan
+ IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 32, No. 9, Sep. 2010
+ This means that it takes an input image img and outputs Felzenszwalb's
+ 31 dimensional version of HOG features, which are stored into #hog.
+ - The input image is broken into cells that are cell_size by cell_size pixels
+ and within each cell we compute a 31 dimensional FHOG vector. This vector
+ describes the gradient structure within the cell.
+ - A common task is to convolve each channel of the hog image with a linear
+ filter. This is made more convenient if the contents of #hog includes extra
+ rows and columns of zero padding along the borders. This extra padding
+ allows for more efficient convolution code since the code does not need to
+ perform expensive boundary checking. Therefore, you can set
+ filter_rows_padding and filter_cols_padding to indicate the size of the
+ filter you wish to use and this function will ensure #hog has the appropriate
+ extra zero padding along the borders. In particular, it will include the
+ following extra padding:
+ - (filter_rows_padding-1)/2 extra rows of zeros on the top of #hog.
+ - (filter_cols_padding-1)/2 extra columns of zeros on the left of #hog.
+ - filter_rows_padding/2 extra rows of zeros on the bottom of #hog.
+ - filter_cols_padding/2 extra columns of zeros on the right of #hog.
+ Therefore, the extra padding is done such that functions like
+ spatially_filter_image() apply their filters to the entire content containing
+ area of a hog image (note that you should use the following planar version of
+ extract_fhog_features() instead of the interlaced version if you want to use
+ spatially_filter_image() on a hog image).
+ - #hog.nr() == max(round(img.nr()/(double)cell_size)-2,0) + filter_rows_padding-1.
+ - #hog.nc() == max(round(img.nc()/(double)cell_size)-2,0) + filter_cols_padding-1.
+ (i.e. Each output dimension is roughly 1/cell_size the original size but
+ there is a one cell_size border all around the image that is lost and then we
+ add on any additional padding that is requested.)
+ - for all valid r and c:
+ - #hog[r][c] == the FHOG vector describing the cell centered at the pixel location
+ fhog_to_image(point(c,r),cell_size,filter_rows_padding,filter_cols_padding) in img.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type,
+ typename T,
+ typename mm1,
+ typename mm2
+ >
+ void extract_fhog_features(
+ const image_type& img,
+ dlib::array<array2d<T,mm1>,mm2>& hog,
+ int cell_size = 8,
+ int filter_rows_padding = 1,
+ int filter_cols_padding = 1
+ );
+ /*!
+ requires
+ - cell_size > 0
+ - filter_rows_padding > 0
+ - filter_cols_padding > 0
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - T should be float or double
+ ensures
+ - This function is identical to the above extract_fhog_features() routine
+ except that it outputs the results in a planar format rather than the
+ interlaced format used above. That is, each element of the hog vector is
+ placed into one of 31 images inside #hog. To be precise, if vhog is the
+ output of the above interlaced version of extract_fhog_features() then we
+ will have, for all valid r and c:
+ - #hog[i][r][c] == vhog[r][c](i)
+ (where 0 <= i < 31)
+ - #hog.size() == 31
+ - for all valid i:
+ - #hog[i].nr() == hog[0].nr()
+ - #hog[i].nc() == hog[0].nc()
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type
+ >
+ matrix<double,0,1> extract_fhog_features(
+ const image_type& img,
+ int cell_size = 8,
+ int filter_rows_padding = 1,
+ int filter_cols_padding = 1
+ );
+ /*!
+ requires
+ - cell_size > 0
+ - filter_rows_padding > 0
+ - filter_cols_padding > 0
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ ensures
+ - This function calls the above extract_fhog_features() routine and simply
+ packages the entire output into a dlib::matrix. The matrix is constructed
+ using the planar version of extract_fhog_features() and then each output
+ plane is converted into a column vector and subsequently all 31 column
+ vectors are concatenated together and returned.
+ - Each plane is converted into a column vector using reshape_to_column_vector(),
+ and is therefore represented in row major order inside the returned vector.
+ - If H is the array<array2d<double>> object output by the planar
+ extract_fhog_features() then the returned vector is composed by concatenating
+ H[0], then H[1], then H[2], and so on in ascending index order.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type,
+ typename T
+ >
+ void extract_fhog_features(
+ const image_type& img,
+ matrix<T,0,1>& feats,
+ int cell_size = 8,
+ int filter_rows_padding = 1,
+ int filter_cols_padding = 1
+ );
+ /*!
+ requires
+ - cell_size > 0
+ - filter_rows_padding > 0
+ - filter_cols_padding > 0
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - T is float, double, or long double
+ ensures
+ - This function is identical to the above version of extract_fhog_features()
+ that returns a matrix<double,0,1> except that it returns the matrix here
+ through a reference argument instead of returning it by value.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ inline point image_to_fhog (
+ point p,
+ int cell_size = 8,
+ int filter_rows_padding = 1,
+ int filter_cols_padding = 1
+ );
+ /*!
+ requires
+ - cell_size > 0
+ - filter_rows_padding > 0
+ - filter_cols_padding > 0
+ ensures
+ - When using extract_fhog_features(), each FHOG cell is extracted from a
+ certain region in the input image. image_to_fhog() returns the identity of
+ the FHOG cell containing the image pixel at location p. Or in other words,
+ let P == image_to_fhog(p) and hog be a FHOG feature map output by
+ extract_fhog_features(), then hog[P.y()][P.x()] == the FHOG vector/cell
+ containing the point p in the input image. Note that some image points
+ might not have corresponding feature locations. E.g. border points or points
+ outside the image. In these cases the returned point will be outside the
+ input image.
+ - Note that you should use the same values of cell_size, filter_rows_padding,
+ and filter_cols_padding that you used with extract_fhog_features().
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ inline rectangle image_to_fhog (
+ const rectangle& rect,
+ int cell_size = 8,
+ int filter_rows_padding = 1,
+ int filter_cols_padding = 1
+ );
+ /*!
+ requires
+ - cell_size > 0
+ - filter_rows_padding > 0
+ - filter_cols_padding > 0
+ ensures
+ - maps a rectangle from image space to fhog space. In particular this function returns:
+ rectangle(image_to_fhog(rect.tl_corner(),cell_size,filter_rows_padding,filter_cols_padding),
+ image_to_fhog(rect.br_corner(),cell_size,filter_rows_padding,filter_cols_padding))
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ inline point fhog_to_image (
+ point p,
+ int cell_size = 8,
+ int filter_rows_padding = 1,
+ int filter_cols_padding = 1
+ );
+ /*!
+ requires
+ - cell_size > 0
+ - filter_rows_padding > 0
+ - filter_cols_padding > 0
+ ensures
+ - Maps a pixel in a FHOG image (produced by extract_fhog_features()) back to the
+ corresponding original input pixel. Note that since FHOG images are
+ spatially downsampled by aggregation into cells the mapping is not totally
+ invertible. Therefore, the returned location will be the center of the cell
+ in the original image that contained the FHOG vector at position p. Moreover,
+ cell_size, filter_rows_padding, and filter_cols_padding should be set to the
+ values used by the call to extract_fhog_features().
+ - Mapping from fhog space to image space is an invertible transformation. That
+ is, for any point P we have P == image_to_fhog(fhog_to_image(P,cell_size,filter_rows_padding,filter_cols_padding),
+ cell_size,filter_rows_padding,filter_cols_padding).
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ inline rectangle fhog_to_image (
+ const rectangle& rect,
+ int cell_size = 8,
+ int filter_rows_padding = 1,
+ int filter_cols_padding = 1
+ );
+ /*!
+ requires
+ - cell_size > 0
+ - filter_rows_padding > 0
+ - filter_cols_padding > 0
+ ensures
+ - maps a rectangle from fhog space to image space. In particular this function returns:
+ rectangle(fhog_to_image(rect.tl_corner(),cell_size,filter_rows_padding,filter_cols_padding),
+ fhog_to_image(rect.br_corner(),cell_size,filter_rows_padding,filter_cols_padding))
+ - Mapping from fhog space to image space is an invertible transformation. That
+ is, for any rectangle R we have R == image_to_fhog(fhog_to_image(R,cell_size,filter_rows_padding,filter_cols_padding),
+ cell_size,filter_rows_padding,filter_cols_padding).
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename T,
+ typename mm1,
+ typename mm2
+ >
+ matrix<unsigned char> draw_fhog(
+ const dlib::array<array2d<T,mm1>,mm2>& hog,
+ const long cell_draw_size = 15,
+ const float min_response_threshold = 0.0
+ );
+ /*!
+ requires
+ - cell_draw_size > 0
+ - hog.size() == 31
+ ensures
+ - Interprets hog as a FHOG feature map output by extract_fhog_features() and
+ converts it into an image suitable for display on the screen. In particular,
+ we draw all the hog cells into a grayscale image in a way that shows the
+ magnitude and orientation of the gradient energy in each cell. The result is
+ then returned.
+ - The size of the cells in the output image will be rendered as cell_draw_size
+ pixels wide and tall.
+ - HOG cells with a response value less than min_response_threshold are not
+ drawn.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename T
+ >
+ matrix<unsigned char> draw_fhog (
+ const std::vector<matrix<T> >& hog,
+ const long cell_draw_size = 15,
+ const float min_response_threshold = 0.0
+ );
+ /*!
+ requires
+ - cell_draw_size > 0
+ - hog.size() == 31
+ ensures
+ - This function just converts the given hog object into an array<array2d<T>>
+ and passes it to the above draw_fhog() routine and returns the results.
+ - HOG cells with a response value less than min_response_threshold are not
+ drawn.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename T,
+ typename mm
+ >
+ matrix<unsigned char> draw_fhog(
+ const array2d<matrix<T,31,1>,mm>& hog,
+ const long cell_draw_size = 15,
+ const float min_response_threshold = 0.0
+ );
+ /*!
+ requires
+ - cell_draw_size > 0
+ ensures
+ - Interprets hog as a FHOG feature map output by extract_fhog_features() and
+ converts it into an image suitable for display on the screen. In particular,
+ we draw all the hog cells into a grayscale image in a way that shows the
+ magnitude and orientation of the gradient energy in each cell. The result is
+ then returned.
+ - The size of the cells in the output image will be rendered as cell_draw_size
+ pixels wide and tall.
+ - HOG cells with a response value less than min_response_threshold are not
+ drawn.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_fHOG_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/image_transforms/hough_transform.h b/ml/dlib/dlib/image_transforms/hough_transform.h
new file mode 100644
index 000000000..477b4dc2b
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/hough_transform.h
@@ -0,0 +1,358 @@
+// Copyright (C) 2014 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_HOUGH_tRANSFORM_Hh_
+#define DLIB_HOUGH_tRANSFORM_Hh_
+
+#include "hough_transform_abstract.h"
+#include "../image_processing/generic_image.h"
+#include "../geometry.h"
+#include "../algs.h"
+#include "assign_image.h"
+#include <limits>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ class hough_transform
+ {
+
+ public:
+ explicit hough_transform (
+ unsigned long size_
+ ) : _size(size_)
+ {
+ DLIB_CASSERT(size_ > 0,
+ "\t hough_transform::hough_transform(size_)"
+ << "\n\t Invalid arguments given to this function."
+ );
+
+ even_size = _size - (_size%2);
+
+ const point cent = center(rectangle(0,0,size_-1,size_-1));
+ xcos_theta.set_size(size_, size_);
+ ysin_theta.set_size(size_, size_);
+
+ std::vector<double> cos_theta(size_), sin_theta(size_);
+ const double scale = 1<<16;
+ for (unsigned long t = 0; t < size_; ++t)
+ {
+ double theta = t*pi/even_size;
+
+ cos_theta[t] = scale*std::cos(theta)/sqrt_2;
+ sin_theta[t] = scale*std::sin(theta)/sqrt_2;
+ }
+ const double offset = scale*even_size/4.0 + 0.5;
+
+ for (unsigned long c = 0; c < size_; ++c)
+ {
+ const long x = c - cent.x();
+ for (unsigned long t = 0; t < size_; ++t)
+ xcos_theta(c,t) = static_cast<int32>(x*cos_theta[t] + offset);
+ }
+ for (unsigned long r = 0; r < size_; ++r)
+ {
+ const long y = r - cent.y();
+ for (unsigned long t = 0; t < size_; ++t)
+ ysin_theta(r,t) = static_cast<int32>(y*sin_theta[t] + offset);
+ }
+ }
+
+ unsigned long size(
+ ) const { return _size; }
+
+ long nr(
+ ) const { return _size; }
+
+ long nc(
+ ) const { return _size; }
+
+ std::pair<point, point> get_line (
+ const point& p
+ ) const
+ {
+ DLIB_ASSERT(rectangle(0,0,size()-1,size()-1).contains(p) == true,
+ "\t pair<point,point> hough_transform::get_line(point)"
+ << "\n\t Invalid arguments given to this function."
+ << "\n\t p: " << p
+ << "\n\t size(): " << size()
+ );
+
+ // First we compute the radius measured in pixels from the center and the theta
+ // angle in radians.
+ typedef dlib::vector<double,2> vect;
+ const rectangle box(0,0,size()-1,size()-1);
+ const vect cent = center(box);
+ double theta = p.x()-cent.x();
+ double radius = p.y()-cent.y();
+ theta = theta*pi/even_size;
+ radius = radius*sqrt_2 + 0.5;
+
+ // now make a line segment on the line.
+ vect v1 = cent + vect(size()+1000,0) + vect(0,radius);
+ vect v2 = cent - vect(size()+1000,0) + vect(0,radius);
+ point p1 = rotate_point(cent, v1, theta);
+ point p2 = rotate_point(cent, v2, theta);
+
+ clip_line_to_rectangle(box, p1, p2);
+
+ return std::make_pair(p1,p2);
+ }
+
+ template <
+ typename image_type
+ >
+ point get_best_hough_point (
+ const point& p,
+ const image_type& himg_
+ )
+ {
+ const const_image_view<image_type> himg(himg_);
+
+ DLIB_ASSERT(himg.nr() == size() && himg.nc() == size() &&
+ rectangle(0,0,size()-1,size()-1).contains(p) == true,
+ "\t point hough_transform::get_best_hough_point()"
+ << "\n\t Invalid arguments given to this function."
+ << "\n\t himg.nr(): " << himg.nr()
+ << "\n\t himg.nc(): " << himg.nc()
+ << "\n\t size(): " << size()
+ << "\n\t p: " << p
+ );
+
+
+ typedef typename image_traits<image_type>::pixel_type pixel_type;
+ COMPILE_TIME_ASSERT(pixel_traits<pixel_type>::grayscale == true);
+ pixel_type best_val = std::numeric_limits<pixel_type>::min();
+ point best_point;
+
+
+ const long max_n8 = (himg.nc()/8)*8;
+ const long max_n4 = (himg.nc()/4)*4;
+ const long r = p.y();
+ const long c = p.x();
+
+ const int32* ysin = &ysin_theta(r,0);
+ const int32* xcos = &xcos_theta(c,0);
+ long t = 0;
+ while(t < max_n8)
+ {
+ long rr0 = (*xcos++ + *ysin++)>>16;
+ long rr1 = (*xcos++ + *ysin++)>>16;
+ long rr2 = (*xcos++ + *ysin++)>>16;
+ long rr3 = (*xcos++ + *ysin++)>>16;
+ long rr4 = (*xcos++ + *ysin++)>>16;
+ long rr5 = (*xcos++ + *ysin++)>>16;
+ long rr6 = (*xcos++ + *ysin++)>>16;
+ long rr7 = (*xcos++ + *ysin++)>>16;
+
+ if (himg[rr0][t++] > best_val)
+ {
+ best_val = himg[rr0][t-1];
+ best_point.x() = t-1;
+ best_point.y() = rr0;
+ }
+ if (himg[rr1][t++] > best_val)
+ {
+ best_val = himg[rr1][t-1];
+ best_point.x() = t-1;
+ best_point.y() = rr1;
+ }
+ if (himg[rr2][t++] > best_val)
+ {
+ best_val = himg[rr2][t-1];
+ best_point.x() = t-1;
+ best_point.y() = rr2;
+ }
+ if (himg[rr3][t++] > best_val)
+ {
+ best_val = himg[rr3][t-1];
+ best_point.x() = t-1;
+ best_point.y() = rr3;
+ }
+ if (himg[rr4][t++] > best_val)
+ {
+ best_val = himg[rr4][t-1];
+ best_point.x() = t-1;
+ best_point.y() = rr4;
+ }
+ if (himg[rr5][t++] > best_val)
+ {
+ best_val = himg[rr5][t-1];
+ best_point.x() = t-1;
+ best_point.y() = rr5;
+ }
+ if (himg[rr6][t++] > best_val)
+ {
+ best_val = himg[rr6][t-1];
+ best_point.x() = t-1;
+ best_point.y() = rr6;
+ }
+ if (himg[rr7][t++] > best_val)
+ {
+ best_val = himg[rr7][t-1];
+ best_point.x() = t-1;
+ best_point.y() = rr7;
+ }
+ }
+ while(t < max_n4)
+ {
+ long rr0 = (*xcos++ + *ysin++)>>16;
+ long rr1 = (*xcos++ + *ysin++)>>16;
+ long rr2 = (*xcos++ + *ysin++)>>16;
+ long rr3 = (*xcos++ + *ysin++)>>16;
+ if (himg[rr0][t++] > best_val)
+ {
+ best_val = himg[rr0][t-1];
+ best_point.x() = t-1;
+ best_point.y() = rr0;
+ }
+ if (himg[rr1][t++] > best_val)
+ {
+ best_val = himg[rr1][t-1];
+ best_point.x() = t-1;
+ best_point.y() = rr1;
+ }
+ if (himg[rr2][t++] > best_val)
+ {
+ best_val = himg[rr2][t-1];
+ best_point.x() = t-1;
+ best_point.y() = rr2;
+ }
+ if (himg[rr3][t++] > best_val)
+ {
+ best_val = himg[rr3][t-1];
+ best_point.x() = t-1;
+ best_point.y() = rr3;
+ }
+ }
+ while(t < himg.nc())
+ {
+ long rr0 = (*xcos++ + *ysin++)>>16;
+ if (himg[rr0][t++] > best_val)
+ {
+ best_val = himg[rr0][t-1];
+ best_point.x() = t-1;
+ best_point.y() = rr0;
+ }
+ }
+
+ return best_point;
+ }
+
+ template <
+ typename in_image_type,
+ typename out_image_type
+ >
+ void operator() (
+ const in_image_type& img_,
+ const rectangle& box,
+ out_image_type& himg_
+ ) const
+ {
+ typedef typename image_traits<in_image_type>::pixel_type in_pixel_type;
+ typedef typename image_traits<out_image_type>::pixel_type out_pixel_type;
+
+ DLIB_CASSERT(box.width() == size() && box.height() == size(),
+ "\t hough_transform::hough_transform(size_)"
+ << "\n\t Invalid arguments given to this function."
+ << "\n\t box.width(): " << box.width()
+ << "\n\t box.height(): " << box.height()
+ << "\n\t size(): " << size()
+ );
+
+ COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type>::grayscale == true);
+ COMPILE_TIME_ASSERT(pixel_traits<out_pixel_type>::grayscale == true);
+
+ const_image_view<in_image_type> img(img_);
+ image_view<out_image_type> himg(himg_);
+
+ himg.set_size(size(), size());
+ assign_all_pixels(himg, 0);
+
+ const rectangle area = box.intersect(get_rect(img));
+
+ const long max_n8 = (himg.nc()/8)*8;
+ const long max_n4 = (himg.nc()/4)*4;
+ for (long r = area.top(); r <= area.bottom(); ++r)
+ {
+ const int32* ysin_base = &ysin_theta(r-box.top(),0);
+ for (long c = area.left(); c <= area.right(); ++c)
+ {
+ const out_pixel_type val = static_cast<out_pixel_type>(img[r][c]);
+ if (val != 0)
+ {
+ /*
+ // The code in this comment is equivalent to the more complex but
+ // faster code below. We keep this simple version of the Hough
+ // transform implementation here just to document what it's doing
+ // more clearly.
+ const point cent = center(box);
+ const long x = c - cent.x();
+ const long y = r - cent.y();
+ for (long t = 0; t < himg.nc(); ++t)
+ {
+ double theta = t*pi/even_size;
+ double radius = (x*std::cos(theta) + y*std::sin(theta))/sqrt_2 + even_size/2 + 0.5;
+ long rr = static_cast<long>(radius);
+ himg[rr][t] += val;
+ }
+ continue;
+ */
+
+ // Run the speed optimized version of the code in the above
+ // comment.
+ const int32* ysin = ysin_base;
+ const int32* xcos = &xcos_theta(c-box.left(),0);
+ long t = 0;
+ while(t < max_n8)
+ {
+ long rr0 = (*xcos++ + *ysin++)>>16;
+ long rr1 = (*xcos++ + *ysin++)>>16;
+ long rr2 = (*xcos++ + *ysin++)>>16;
+ long rr3 = (*xcos++ + *ysin++)>>16;
+ long rr4 = (*xcos++ + *ysin++)>>16;
+ long rr5 = (*xcos++ + *ysin++)>>16;
+ long rr6 = (*xcos++ + *ysin++)>>16;
+ long rr7 = (*xcos++ + *ysin++)>>16;
+
+ himg[rr0][t++] += val;
+ himg[rr1][t++] += val;
+ himg[rr2][t++] += val;
+ himg[rr3][t++] += val;
+ himg[rr4][t++] += val;
+ himg[rr5][t++] += val;
+ himg[rr6][t++] += val;
+ himg[rr7][t++] += val;
+ }
+ while(t < max_n4)
+ {
+ long rr0 = (*xcos++ + *ysin++)>>16;
+ long rr1 = (*xcos++ + *ysin++)>>16;
+ long rr2 = (*xcos++ + *ysin++)>>16;
+ long rr3 = (*xcos++ + *ysin++)>>16;
+ himg[rr0][t++] += val;
+ himg[rr1][t++] += val;
+ himg[rr2][t++] += val;
+ himg[rr3][t++] += val;
+ }
+ while(t < himg.nc())
+ {
+ long rr0 = (*xcos++ + *ysin++)>>16;
+ himg[rr0][t++] += val;
+ }
+ }
+ }
+ }
+ }
+
+ private:
+
+ unsigned long _size;
+ unsigned long even_size; // equal to _size if _size is even, otherwise equal to _size-1.
+ matrix<int32> xcos_theta, ysin_theta;
+ };
+}
+
+#endif // DLIB_HOUGH_tRANSFORM_Hh_
+
diff --git a/ml/dlib/dlib/image_transforms/hough_transform_abstract.h b/ml/dlib/dlib/image_transforms/hough_transform_abstract.h
new file mode 100644
index 000000000..f0ff2b550
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/hough_transform_abstract.h
@@ -0,0 +1,145 @@
+// Copyright (C) 2014 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_HOUGH_tRANSFORM_ABSTRACT_Hh_
+#ifdef DLIB_HOUGH_tRANSFORM_ABSTRACT_Hh_
+
+#include "../geometry.h"
+#include "../image_processing/generic_image.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ class hough_transform
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This object is a tool for computing the line finding version of the Hough
+ transform given some kind of edge detection image as input. It also allows
+ the edge pixels to be weighted such that higher weighted edge pixels
+ contribute correspondingly more to the output of the Hough transform,
+ allowing stronger edges to create correspondingly stronger line detections
+ in the final Hough transform.
+
+ THREAD SAFETY
+ It is safe for multiple threads to make concurrent accesses to this object
+ without synchronization.
+ !*/
+
+ public:
+
+ explicit hough_transform (
+ unsigned long size_
+ );
+ /*!
+ requires
+ - size_ > 0
+ ensures
+ - This object will compute Hough transforms that are size_ by size_ pixels.
+ This is in terms of both the Hough accumulator array size as well as the
+ input image size.
+ - #size() == size_
+ !*/
+
+ unsigned long size(
+ ) const;
+ /*!
+ ensures
+ - returns the size of the Hough transforms generated by this object. In
+ particular, this object creates Hough transform images that are size() by
+ size() pixels in size.
+ !*/
+
+ long nr(
+ ) const;
+ /*!
+ ensures
+ - returns size()
+ !*/
+
+ long nc(
+ ) const;
+ /*!
+ ensures
+ - returns size()
+ !*/
+
+ std::pair<point, point> get_line (
+ const point& p
+ ) const;
+ /*!
+ requires
+ - rectangle(0,0,size()-1,size()-1).contains(p) == true
+ (i.e. p must be a point inside the Hough accumulator array)
+ ensures
+ - returns the line segment in the original image space corresponding
+ to Hough transform point p.
+ - The returned points are inside rectangle(0,0,size()-1,size()-1).
+ !*/
+
+ template <
+ typename image_type
+ >
+ point get_best_hough_point (
+ const point& p,
+ const image_type& himg
+ );
+ /*!
+ requires
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h and it must contain grayscale pixels.
+ - himg.nr() == size()
+ - himg.nc() == size()
+ - rectangle(0,0,size()-1,size()-1).contains(p) == true
+ ensures
+ - This function interprets himg as a Hough image and p as a point in the
+ original image space. Given this, it finds the maximum scoring line that
+ passes though p. That is, it checks all the Hough accumulator bins in
+ himg corresponding to lines though p and returns the location with the
+ largest score.
+ - returns a point X such that get_rect(himg).contains(X) == true
+ !*/
+
+ template <
+ typename in_image_type,
+ typename out_image_type
+ >
+ void operator() (
+ const in_image_type& img,
+ const rectangle& box,
+ out_image_type& himg
+ ) const;
+ /*!
+ requires
+ - in_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h and it must contain grayscale pixels.
+ - out_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h and it must contain grayscale pixels.
+ - box.width() == size()
+ - box.height() == size()
+ ensures
+ - Computes the Hough transform of the part of img contained within box.
+ In particular, we do a grayscale version of the Hough transform where any
+ non-zero pixel in img is treated as a potential component of a line and
+ accumulated into the Hough accumulator #himg. However, rather than
+ adding 1 to each relevant accumulator bin we add the value of the pixel
+ in img to each Hough accumulator bin. This means that, if all the
+ pixels in img are 0 or 1 then this routine performs a normal Hough
+ transform. However, if some pixels have larger values then they will be
+ weighted correspondingly more in the resulting Hough transform.
+ - #himg.nr() == size()
+ - #himg.nc() == size()
+ - #himg is the Hough transform of the part of img contained in box. Each
+ point in #himg corresponds to a line in the input box. In particular,
+ the line for #himg[y][x] is given by get_line(point(x,y)). Also, when
+ viewing the #himg image, the x-axis gives the angle of the line and the
+ y-axis the distance of the line from the center of the box.
+ !*/
+
+ };
+}
+
+#endif // DLIB_HOUGH_tRANSFORM_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/image_transforms/image_pyramid.h b/ml/dlib/dlib/image_transforms/image_pyramid.h
new file mode 100644
index 000000000..3efed30d8
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/image_pyramid.h
@@ -0,0 +1,1238 @@
+// Copyright (C) 2010 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_IMAGE_PYRaMID_Hh_
+#define DLIB_IMAGE_PYRaMID_Hh_
+
+#include "image_pyramid_abstract.h"
+#include "../pixel.h"
+#include "../array2d.h"
+#include "../geometry.h"
+#include "spatial_filtering.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ class pyramid_disable : noncopyable
+ {
+ public:
+
+ template <typename T>
+ vector<double,2> point_down (
+ const vector<T,2>&
+ ) const
+ {
+ return vector<double,2>(0,0);
+ }
+
+ template <typename T>
+ vector<double,2> point_up (
+ const vector<T,2>&
+ ) const
+ {
+ return vector<double,2>(0,0);
+ }
+
+ // -----------------------------
+
+ template <typename T>
+ vector<double,2> point_down (
+ const vector<T,2>& p,
+ unsigned int levels
+ ) const
+ {
+ if (levels == 0)
+ return p;
+ else
+ return vector<double,2>(0,0);
+ }
+
+ template <typename T>
+ vector<double,2> point_up (
+ const vector<T,2>& p,
+ unsigned int levels
+ ) const
+ {
+ if (levels == 0)
+ return p;
+ else
+ return vector<double,2>(0,0);
+ }
+
+ // -----------------------------
+
+ drectangle rect_up (
+ const drectangle& rect
+ ) const
+ {
+ return drectangle(point_up(rect.tl_corner()), point_up(rect.br_corner()));
+ }
+
+ drectangle rect_up (
+ const drectangle& rect,
+ unsigned int levels
+ ) const
+ {
+ return drectangle(point_up(rect.tl_corner(),levels), point_up(rect.br_corner(),levels));
+ }
+
+ // -----------------------------
+
+ drectangle rect_down (
+ const drectangle& rect
+ ) const
+ {
+ return drectangle(point_down(rect.tl_corner()), point_down(rect.br_corner()));
+ }
+
+ drectangle rect_down (
+ const drectangle& rect,
+ unsigned int levels
+ ) const
+ {
+ return drectangle(point_down(rect.tl_corner(),levels), point_down(rect.br_corner(),levels));
+ }
+
+ // -----------------------------
+
+ public:
+
+ template <
+ typename in_image_type,
+ typename out_image_type
+ >
+ void operator() (
+ // we do this #ifdef stuff to avoid compiler warnings about unused variables.
+#ifdef ENABLE_ASSERTS
+ const in_image_type& original,
+#else
+ const in_image_type& ,
+#endif
+ out_image_type& down
+ ) const
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(is_same_object(original, down) == false,
+ "\t void pyramid_disable::operator()"
+ << "\n\t is_same_object(original, down): " << is_same_object(original, down)
+ << "\n\t this: " << this
+ );
+
+ typedef typename image_traits<in_image_type>::pixel_type in_pixel_type;
+ typedef typename image_traits<out_image_type>::pixel_type out_pixel_type;
+ COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false );
+ COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false );
+
+ set_image_size(down, 0, 0);
+ }
+
+ template <
+ typename image_type
+ >
+ void operator() (
+ image_type& img
+ ) const
+ {
+ typedef typename image_traits<image_type>::pixel_type pixel_type;
+ COMPILE_TIME_ASSERT( pixel_traits<pixel_type>::has_alpha == false );
+ set_image_size(img, 0, 0);
+ }
+ };
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ namespace impl
+ {
+
+ class pyramid_down_2_1 : noncopyable
+ {
+ public:
+
+ template <typename T>
+ vector<double,2> point_down (
+ const vector<T,2>& p
+ ) const
+ {
+ return p/2.0 - vector<double,2>(1.25,0.75);
+ }
+
+ template <typename T>
+ vector<double,2> point_up (
+ const vector<T,2>& p
+ ) const
+ {
+ return (p + vector<T,2>(1.25,0.75))*2;
+ }
+
+ // -----------------------------
+
+ template <typename T>
+ vector<double,2> point_down (
+ const vector<T,2>& p,
+ unsigned int levels
+ ) const
+ {
+ vector<double,2> temp = p;
+ for (unsigned int i = 0; i < levels; ++i)
+ temp = point_down(temp);
+ return temp;
+ }
+
+ template <typename T>
+ vector<double,2> point_up (
+ const vector<T,2>& p,
+ unsigned int levels
+ ) const
+ {
+ vector<double,2> temp = p;
+ for (unsigned int i = 0; i < levels; ++i)
+ temp = point_up(temp);
+ return temp;
+ }
+
+ // -----------------------------
+
+ drectangle rect_up (
+ const drectangle& rect
+ ) const
+ {
+ return drectangle(point_up(rect.tl_corner()), point_up(rect.br_corner()));
+ }
+
+ drectangle rect_up (
+ const drectangle& rect,
+ unsigned int levels
+ ) const
+ {
+ return drectangle(point_up(rect.tl_corner(),levels), point_up(rect.br_corner(),levels));
+ }
+
+ // -----------------------------
+
+ drectangle rect_down (
+ const drectangle& rect
+ ) const
+ {
+ return drectangle(point_down(rect.tl_corner()), point_down(rect.br_corner()));
+ }
+
+ drectangle rect_down (
+ const drectangle& rect,
+ unsigned int levels
+ ) const
+ {
+ return drectangle(point_down(rect.tl_corner(),levels), point_down(rect.br_corner(),levels));
+ }
+
+ // -----------------------------
+
+ private:
+ template <typename T, typename U>
+ struct both_images_rgb
+ {
+ typedef typename image_traits<T>::pixel_type T_pix;
+ typedef typename image_traits<U>::pixel_type U_pix;
+ const static bool value = pixel_traits<T_pix>::rgb && pixel_traits<U_pix>::rgb;
+ };
+ public:
+
+ template <
+ typename in_image_type,
+ typename out_image_type
+ >
+ typename disable_if<both_images_rgb<in_image_type,out_image_type> >::type operator() (
+ const in_image_type& original_,
+ out_image_type& down_
+ ) const
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( is_same_object(original_, down_) == false,
+ "\t void pyramid_down_2_1::operator()"
+ << "\n\t is_same_object(original_, down_): " << is_same_object(original_, down_)
+ << "\n\t this: " << this
+ );
+
+ typedef typename image_traits<in_image_type>::pixel_type in_pixel_type;
+ typedef typename image_traits<out_image_type>::pixel_type out_pixel_type;
+ COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false );
+ COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false );
+
+ const_image_view<in_image_type> original(original_);
+ image_view<out_image_type> down(down_);
+
+ if (original.nr() <= 8 || original.nc() <= 8)
+ {
+ down.clear();
+ return;
+ }
+
+ typedef typename pixel_traits<in_pixel_type>::basic_pixel_type bp_type;
+ typedef typename promote<bp_type>::type ptype;
+ array2d<ptype> temp_img;
+ temp_img.set_size(original.nr(), (original.nc()-3)/2);
+ down.set_size((original.nr()-3)/2, (original.nc()-3)/2);
+
+
+ // This function applies a 5x5 Gaussian filter to the image. It
+ // does this by separating the filter into its horizontal and vertical
+ // components and then downsamples the image by dropping every other
+ // row and column. Note that we can do these things all together in
+ // one step.
+
+ // apply row filter
+ for (long r = 0; r < temp_img.nr(); ++r)
+ {
+ long oc = 0;
+ for (long c = 0; c < temp_img.nc(); ++c)
+ {
+ ptype pix1;
+ ptype pix2;
+ ptype pix3;
+ ptype pix4;
+ ptype pix5;
+
+ assign_pixel(pix1, original[r][oc]);
+ assign_pixel(pix2, original[r][oc+1]);
+ assign_pixel(pix3, original[r][oc+2]);
+ assign_pixel(pix4, original[r][oc+3]);
+ assign_pixel(pix5, original[r][oc+4]);
+
+ pix2 *= 4;
+ pix3 *= 6;
+ pix4 *= 4;
+
+ assign_pixel(temp_img[r][c], pix1 + pix2 + pix3 + pix4 + pix5);
+ oc += 2;
+ }
+ }
+
+
+ // apply column filter
+ long dr = 0;
+ for (long r = 2; r < temp_img.nr()-2; r += 2)
+ {
+ for (long c = 0; c < temp_img.nc(); ++c)
+ {
+ ptype temp = temp_img[r-2][c] +
+ temp_img[r-1][c]*4 +
+ temp_img[r ][c]*6 +
+ temp_img[r+1][c]*4 +
+ temp_img[r+2][c];
+
+ assign_pixel(down[dr][c],temp/256);
+ }
+ ++dr;
+ }
+
+ }
+
+ private:
+ struct rgbptype
+ {
+ uint16 red;
+ uint16 green;
+ uint16 blue;
+ };
+ public:
+ // ------------------------------------------
+ // OVERLOAD FOR RGB TO RGB IMAGES
+ // ------------------------------------------
+ template <
+ typename in_image_type,
+ typename out_image_type
+ >
+ typename enable_if<both_images_rgb<in_image_type,out_image_type> >::type operator() (
+ const in_image_type& original_,
+ out_image_type& down_
+ ) const
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( is_same_object(original_, down_) == false,
+ "\t void pyramid_down_2_1::operator()"
+ << "\n\t is_same_object(original_, down_): " << is_same_object(original_, down_)
+ << "\n\t this: " << this
+ );
+
+ typedef typename image_traits<in_image_type>::pixel_type in_pixel_type;
+ typedef typename image_traits<out_image_type>::pixel_type out_pixel_type;
+ COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false );
+ COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false );
+
+ const_image_view<in_image_type> original(original_);
+ image_view<out_image_type> down(down_);
+
+ if (original.nr() <= 8 || original.nc() <= 8)
+ {
+ down.clear();
+ return;
+ }
+
+ array2d<rgbptype> temp_img;
+ temp_img.set_size(original.nr(), (original.nc()-3)/2);
+ down.set_size((original.nr()-3)/2, (original.nc()-3)/2);
+
+
+ // This function applies a 5x5 Gaussian filter to the image. It
+ // does this by separating the filter into its horizontal and vertical
+ // components and then downsamples the image by dropping every other
+ // row and column. Note that we can do these things all together in
+ // one step.
+
+ // apply row filter
+ for (long r = 0; r < temp_img.nr(); ++r)
+ {
+ long oc = 0;
+ for (long c = 0; c < temp_img.nc(); ++c)
+ {
+ rgbptype pix1;
+ rgbptype pix2;
+ rgbptype pix3;
+ rgbptype pix4;
+ rgbptype pix5;
+
+ pix1.red = original[r][oc].red;
+ pix2.red = original[r][oc+1].red;
+ pix3.red = original[r][oc+2].red;
+ pix4.red = original[r][oc+3].red;
+ pix5.red = original[r][oc+4].red;
+ pix1.green = original[r][oc].green;
+ pix2.green = original[r][oc+1].green;
+ pix3.green = original[r][oc+2].green;
+ pix4.green = original[r][oc+3].green;
+ pix5.green = original[r][oc+4].green;
+ pix1.blue = original[r][oc].blue;
+ pix2.blue = original[r][oc+1].blue;
+ pix3.blue = original[r][oc+2].blue;
+ pix4.blue = original[r][oc+3].blue;
+ pix5.blue = original[r][oc+4].blue;
+
+ pix2.red *= 4;
+ pix3.red *= 6;
+ pix4.red *= 4;
+
+ pix2.green *= 4;
+ pix3.green *= 6;
+ pix4.green *= 4;
+
+ pix2.blue *= 4;
+ pix3.blue *= 6;
+ pix4.blue *= 4;
+
+ rgbptype temp;
+ temp.red = pix1.red + pix2.red + pix3.red + pix4.red + pix5.red;
+ temp.green = pix1.green + pix2.green + pix3.green + pix4.green + pix5.green;
+ temp.blue = pix1.blue + pix2.blue + pix3.blue + pix4.blue + pix5.blue;
+
+ temp_img[r][c] = temp;
+
+ oc += 2;
+ }
+ }
+
+
+ // apply column filter
+ long dr = 0;
+ for (long r = 2; r < temp_img.nr()-2; r += 2)
+ {
+ for (long c = 0; c < temp_img.nc(); ++c)
+ {
+ rgbptype temp;
+ temp.red = temp_img[r-2][c].red +
+ temp_img[r-1][c].red*4 +
+ temp_img[r ][c].red*6 +
+ temp_img[r+1][c].red*4 +
+ temp_img[r+2][c].red;
+ temp.green = temp_img[r-2][c].green +
+ temp_img[r-1][c].green*4 +
+ temp_img[r ][c].green*6 +
+ temp_img[r+1][c].green*4 +
+ temp_img[r+2][c].green;
+ temp.blue = temp_img[r-2][c].blue +
+ temp_img[r-1][c].blue*4 +
+ temp_img[r ][c].blue*6 +
+ temp_img[r+1][c].blue*4 +
+ temp_img[r+2][c].blue;
+
+ down[dr][c].red = temp.red/256;
+ down[dr][c].green = temp.green/256;
+ down[dr][c].blue = temp.blue/256;
+ }
+ ++dr;
+ }
+
+ }
+
+ template <
+ typename image_type
+ >
+ void operator() (
+ image_type& img
+ ) const
+ {
+ image_type temp;
+ (*this)(img, temp);
+ swap(temp, img);
+ }
+
+ private:
+
+
+ };
+
+ // ----------------------------------------------------------------------------------------
+ // ----------------------------------------------------------------------------------------
+ // ----------------------------------------------------------------------------------------
+
+ class pyramid_down_3_2 : noncopyable
+ {
+ public:
+
+ template <typename T>
+ vector<double,2> point_down (
+ const vector<T,2>& p
+ ) const
+ {
+ const double ratio = 2.0/3.0;
+ return p*ratio - vector<double,2>(1,1);
+ }
+
+ template <typename T>
+ vector<double,2> point_up (
+ const vector<T,2>& p
+ ) const
+ {
+ const double ratio = 3.0/2.0;
+ return p*ratio + vector<T,2>(ratio,ratio);
+ }
+
+ // -----------------------------
+
+ template <typename T>
+ vector<double,2> point_down (
+ const vector<T,2>& p,
+ unsigned int levels
+ ) const
+ {
+ vector<double,2> temp = p;
+ for (unsigned int i = 0; i < levels; ++i)
+ temp = point_down(temp);
+ return temp;
+ }
+
+ template <typename T>
+ vector<double,2> point_up (
+ const vector<T,2>& p,
+ unsigned int levels
+ ) const
+ {
+ vector<double,2> temp = p;
+ for (unsigned int i = 0; i < levels; ++i)
+ temp = point_up(temp);
+ return temp;
+ }
+
+ // -----------------------------
+
+ drectangle rect_up (
+ const drectangle& rect
+ ) const
+ {
+ return drectangle(point_up(rect.tl_corner()), point_up(rect.br_corner()));
+ }
+
+ drectangle rect_up (
+ const drectangle& rect,
+ unsigned int levels
+ ) const
+ {
+ return drectangle(point_up(rect.tl_corner(),levels), point_up(rect.br_corner(),levels));
+ }
+
+ // -----------------------------
+
+ drectangle rect_down (
+ const drectangle& rect
+ ) const
+ {
+ return drectangle(point_down(rect.tl_corner()), point_down(rect.br_corner()));
+ }
+
+ drectangle rect_down (
+ const drectangle& rect,
+ unsigned int levels
+ ) const
+ {
+ return drectangle(point_down(rect.tl_corner(),levels), point_down(rect.br_corner(),levels));
+ }
+
+ // -----------------------------
+
+ private:
+ template <typename T, typename U>
+ struct both_images_rgb
+ {
+ typedef typename image_traits<T>::pixel_type T_pix;
+ typedef typename image_traits<U>::pixel_type U_pix;
+ const static bool value = pixel_traits<T_pix>::rgb && pixel_traits<U_pix>::rgb;
+ };
+ public:
+
+ template <
+ typename in_image_type,
+ typename out_image_type
+ >
+ typename disable_if<both_images_rgb<in_image_type,out_image_type> >::type operator() (
+ const in_image_type& original_,
+ out_image_type& down_
+ ) const
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(is_same_object(original_, down_) == false,
+ "\t void pyramid_down_3_2::operator()"
+ << "\n\t is_same_object(original_, down_): " << is_same_object(original_, down_)
+ << "\n\t this: " << this
+ );
+
+ typedef typename image_traits<in_image_type>::pixel_type in_pixel_type;
+ typedef typename image_traits<out_image_type>::pixel_type out_pixel_type;
+ COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false );
+ COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false );
+
+ const_image_view<in_image_type> original(original_);
+ image_view<out_image_type> down(down_);
+
+ if (original.nr() <= 8 || original.nc() <= 8)
+ {
+ down.clear();
+ return;
+ }
+
+ const long size_in = 3;
+ const long size_out = 2;
+
+ typedef typename pixel_traits<in_pixel_type>::basic_pixel_type bp_type;
+ typedef typename promote<bp_type>::type ptype;
+ const long full_nr = size_out*((original.nr()-2)/size_in);
+ const long part_nr = (size_out*(original.nr()-2))/size_in;
+ const long full_nc = size_out*((original.nc()-2)/size_in);
+ const long part_nc = (size_out*(original.nc()-2))/size_in;
+ down.set_size(part_nr, part_nc);
+
+
+ long rr = 1;
+ long r;
+ for (r = 0; r < full_nr; r+=size_out)
+ {
+ long cc = 1;
+ long c;
+ for (c = 0; c < full_nc; c+=size_out)
+ {
+ ptype block[size_in][size_in];
+ separable_3x3_filter_block_grayscale(block, original_, rr, cc, 2, 12, 2);
+
+ // bi-linearly interpolate block
+ assign_pixel(down[r][c] , (block[0][0]*9 + block[1][0]*3 + block[0][1]*3 + block[1][1])/(16*256));
+ assign_pixel(down[r][c+1] , (block[0][2]*9 + block[1][2]*3 + block[0][1]*3 + block[1][1])/(16*256));
+ assign_pixel(down[r+1][c] , (block[2][0]*9 + block[1][0]*3 + block[2][1]*3 + block[1][1])/(16*256));
+ assign_pixel(down[r+1][c+1] , (block[2][2]*9 + block[1][2]*3 + block[2][1]*3 + block[1][1])/(16*256));
+
+ cc += size_in;
+ }
+ if (part_nc - full_nc == 1)
+ {
+ ptype block[size_in][2];
+ separable_3x3_filter_block_grayscale(block, original_, rr, cc, 2, 12, 2);
+
+ // bi-linearly interpolate partial block
+ assign_pixel(down[r][c] , (block[0][0]*9 + block[1][0]*3 + block[0][1]*3 + block[1][1])/(16*256));
+ assign_pixel(down[r+1][c] , (block[2][0]*9 + block[1][0]*3 + block[2][1]*3 + block[1][1])/(16*256));
+ }
+ rr += size_in;
+ }
+ if (part_nr - full_nr == 1)
+ {
+ long cc = 1;
+ long c;
+ for (c = 0; c < full_nc; c+=size_out)
+ {
+ ptype block[2][size_in];
+ separable_3x3_filter_block_grayscale(block, original_, rr, cc, 2, 12, 2);
+
+ // bi-linearly interpolate partial block
+ assign_pixel(down[r][c] , (block[0][0]*9 + block[1][0]*3 + block[0][1]*3 + block[1][1])/(16*256));
+ assign_pixel(down[r][c+1] , (block[0][2]*9 + block[1][2]*3 + block[0][1]*3 + block[1][1])/(16*256));
+
+ cc += size_in;
+ }
+ if (part_nc - full_nc == 1)
+ {
+ ptype block[2][2];
+ separable_3x3_filter_block_grayscale(block, original_, rr, cc, 2, 12, 2);
+
+ // bi-linearly interpolate partial block
+ assign_pixel(down[r][c] , (block[0][0]*9 + block[1][0]*3 + block[0][1]*3 + block[1][1])/(16*256));
+ }
+ }
+
+ }
+
+ private:
+ struct rgbptype
+ {
+ uint32 red;
+ uint32 green;
+ uint32 blue;
+ };
+
+ public:
+ // ------------------------------------------
+ // OVERLOAD FOR RGB TO RGB IMAGES
+ // ------------------------------------------
+ template <
+ typename in_image_type,
+ typename out_image_type
+ >
+ typename enable_if<both_images_rgb<in_image_type,out_image_type> >::type operator() (
+ const in_image_type& original_,
+ out_image_type& down_
+ ) const
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( is_same_object(original_, down_) == false,
+ "\t void pyramid_down_3_2::operator()"
+ << "\n\t is_same_object(original_, down_): " << is_same_object(original_, down_)
+ << "\n\t this: " << this
+ );
+
+ typedef typename image_traits<in_image_type>::pixel_type in_pixel_type;
+ typedef typename image_traits<out_image_type>::pixel_type out_pixel_type;
+ COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false );
+ COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false );
+
+ const_image_view<in_image_type> original(original_);
+ image_view<out_image_type> down(down_);
+
+ if (original.nr() <= 8 || original.nc() <= 8)
+ {
+ down.clear();
+ return;
+ }
+
+ const long size_in = 3;
+ const long size_out = 2;
+
+ const long full_nr = size_out*((original.nr()-2)/size_in);
+ const long part_nr = (size_out*(original.nr()-2))/size_in;
+ const long full_nc = size_out*((original.nc()-2)/size_in);
+ const long part_nc = (size_out*(original.nc()-2))/size_in;
+ down.set_size(part_nr, part_nc);
+
+
+ long rr = 1;
+ long r;
+ for (r = 0; r < full_nr; r+=size_out)
+ {
+ long cc = 1;
+ long c;
+ for (c = 0; c < full_nc; c+=size_out)
+ {
+ rgbptype block[size_in][size_in];
+ separable_3x3_filter_block_rgb(block, original_, rr, cc, 2, 12, 2);
+
+ // bi-linearly interpolate block
+ down[r][c].red = (block[0][0].red*9 + block[1][0].red*3 + block[0][1].red*3 + block[1][1].red)/(16*256);
+ down[r][c].green = (block[0][0].green*9 + block[1][0].green*3 + block[0][1].green*3 + block[1][1].green)/(16*256);
+ down[r][c].blue = (block[0][0].blue*9 + block[1][0].blue*3 + block[0][1].blue*3 + block[1][1].blue)/(16*256);
+
+ down[r][c+1].red = (block[0][2].red*9 + block[1][2].red*3 + block[0][1].red*3 + block[1][1].red)/(16*256);
+ down[r][c+1].green = (block[0][2].green*9 + block[1][2].green*3 + block[0][1].green*3 + block[1][1].green)/(16*256);
+ down[r][c+1].blue = (block[0][2].blue*9 + block[1][2].blue*3 + block[0][1].blue*3 + block[1][1].blue)/(16*256);
+
+ down[r+1][c].red = (block[2][0].red*9 + block[1][0].red*3 + block[2][1].red*3 + block[1][1].red)/(16*256);
+ down[r+1][c].green = (block[2][0].green*9 + block[1][0].green*3 + block[2][1].green*3 + block[1][1].green)/(16*256);
+ down[r+1][c].blue = (block[2][0].blue*9 + block[1][0].blue*3 + block[2][1].blue*3 + block[1][1].blue)/(16*256);
+
+ down[r+1][c+1].red = (block[2][2].red*9 + block[1][2].red*3 + block[2][1].red*3 + block[1][1].red)/(16*256);
+ down[r+1][c+1].green = (block[2][2].green*9 + block[1][2].green*3 + block[2][1].green*3 + block[1][1].green)/(16*256);
+ down[r+1][c+1].blue = (block[2][2].blue*9 + block[1][2].blue*3 + block[2][1].blue*3 + block[1][1].blue)/(16*256);
+
+ cc += size_in;
+ }
+ if (part_nc - full_nc == 1)
+ {
+ rgbptype block[size_in][2];
+ separable_3x3_filter_block_rgb(block, original_, rr, cc, 2, 12, 2);
+
+ // bi-linearly interpolate partial block
+ down[r][c].red = (block[0][0].red*9 + block[1][0].red*3 + block[0][1].red*3 + block[1][1].red)/(16*256);
+ down[r][c].green = (block[0][0].green*9 + block[1][0].green*3 + block[0][1].green*3 + block[1][1].green)/(16*256);
+ down[r][c].blue = (block[0][0].blue*9 + block[1][0].blue*3 + block[0][1].blue*3 + block[1][1].blue)/(16*256);
+
+ down[r+1][c].red = (block[2][0].red*9 + block[1][0].red*3 + block[2][1].red*3 + block[1][1].red)/(16*256);
+ down[r+1][c].green = (block[2][0].green*9 + block[1][0].green*3 + block[2][1].green*3 + block[1][1].green)/(16*256);
+ down[r+1][c].blue = (block[2][0].blue*9 + block[1][0].blue*3 + block[2][1].blue*3 + block[1][1].blue)/(16*256);
+ }
+ rr += size_in;
+ }
+ if (part_nr - full_nr == 1)
+ {
+ long cc = 1;
+ long c;
+ for (c = 0; c < full_nc; c+=size_out)
+ {
+ rgbptype block[2][size_in];
+ separable_3x3_filter_block_rgb(block, original_, rr, cc, 2, 12, 2);
+
+ // bi-linearly interpolate partial block
+ down[r][c].red = (block[0][0].red*9 + block[1][0].red*3 + block[0][1].red*3 + block[1][1].red)/(16*256);
+ down[r][c].green = (block[0][0].green*9 + block[1][0].green*3 + block[0][1].green*3 + block[1][1].green)/(16*256);
+ down[r][c].blue = (block[0][0].blue*9 + block[1][0].blue*3 + block[0][1].blue*3 + block[1][1].blue)/(16*256);
+
+ down[r][c+1].red = (block[0][2].red*9 + block[1][2].red*3 + block[0][1].red*3 + block[1][1].red)/(16*256);
+ down[r][c+1].green = (block[0][2].green*9 + block[1][2].green*3 + block[0][1].green*3 + block[1][1].green)/(16*256);
+ down[r][c+1].blue = (block[0][2].blue*9 + block[1][2].blue*3 + block[0][1].blue*3 + block[1][1].blue)/(16*256);
+
+ cc += size_in;
+ }
+ if (part_nc - full_nc == 1)
+ {
+ rgbptype block[2][2];
+ separable_3x3_filter_block_rgb(block, original_, rr, cc, 2, 12, 2);
+
+ // bi-linearly interpolate partial block
+ down[r][c].red = (block[0][0].red*9 + block[1][0].red*3 + block[0][1].red*3 + block[1][1].red)/(16*256);
+ down[r][c].green = (block[0][0].green*9 + block[1][0].green*3 + block[0][1].green*3 + block[1][1].green)/(16*256);
+ down[r][c].blue = (block[0][0].blue*9 + block[1][0].blue*3 + block[0][1].blue*3 + block[1][1].blue)/(16*256);
+ }
+ }
+ }
+
+ template <
+ typename image_type
+ >
+ void operator() (
+ image_type& img
+ ) const
+ {
+ image_type temp;
+ (*this)(img, temp);
+ swap(temp, img);
+ }
+ private:
+
+
+ };
+
+ }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ template <
+ unsigned int N
+ >
+ class pyramid_down : noncopyable
+ {
+ public:
+
+ COMPILE_TIME_ASSERT(N > 0);
+
+ template <typename T>
+ vector<double,2> point_down (
+ const vector<T,2>& p
+ ) const
+ {
+ const double ratio = (N-1.0)/N;
+ return (p - 0.3)*ratio;
+ }
+
+ template <typename T>
+ vector<double,2> point_up (
+ const vector<T,2>& p
+ ) const
+ {
+ const double ratio = N/(N-1.0);
+ return p*ratio + 0.3;
+ }
+
+ // -----------------------------
+
+ template <typename T>
+ vector<double,2> point_down (
+ const vector<T,2>& p,
+ unsigned int levels
+ ) const
+ {
+ vector<double,2> temp = p;
+ for (unsigned int i = 0; i < levels; ++i)
+ temp = point_down(temp);
+ return temp;
+ }
+
+ template <typename T>
+ vector<double,2> point_up (
+ const vector<T,2>& p,
+ unsigned int levels
+ ) const
+ {
+ vector<double,2> temp = p;
+ for (unsigned int i = 0; i < levels; ++i)
+ temp = point_up(temp);
+ return temp;
+ }
+
+ // -----------------------------
+
+ drectangle rect_up (
+ const drectangle& rect
+ ) const
+ {
+ return drectangle(point_up(rect.tl_corner()), point_up(rect.br_corner()));
+ }
+
+ drectangle rect_up (
+ const drectangle& rect,
+ unsigned int levels
+ ) const
+ {
+ return drectangle(point_up(rect.tl_corner(),levels), point_up(rect.br_corner(),levels));
+ }
+
+ // -----------------------------
+
+ drectangle rect_down (
+ const drectangle& rect
+ ) const
+ {
+ return drectangle(point_down(rect.tl_corner()), point_down(rect.br_corner()));
+ }
+
+ drectangle rect_down (
+ const drectangle& rect,
+ unsigned int levels
+ ) const
+ {
+ return drectangle(point_down(rect.tl_corner(),levels), point_down(rect.br_corner(),levels));
+ }
+
+ template <
+ typename in_image_type,
+ typename out_image_type
+ >
+ void operator() (
+ const in_image_type& original,
+ out_image_type& down
+ ) const
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(is_same_object(original, down) == false,
+ "\t void pyramid_down::operator()"
+ << "\n\t is_same_object(original, down): " << is_same_object(original, down)
+ << "\n\t this: " << this
+ );
+
+ typedef typename image_traits<in_image_type>::pixel_type in_pixel_type;
+ typedef typename image_traits<out_image_type>::pixel_type out_pixel_type;
+ COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false );
+ COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false );
+
+
+ set_image_size(down, ((N-1)*num_rows(original))/N+0.5, ((N-1)*num_columns(original))/N+0.5);
+ resize_image(original, down);
+ }
+
+ template <
+ typename image_type
+ >
+ void operator() (
+ image_type& img
+ ) const
+ {
+ image_type temp;
+ (*this)(img, temp);
+ swap(temp, img);
+ }
+ };
+
+ template <>
+ class pyramid_down<1> : public pyramid_disable {};
+
+ template <>
+ class pyramid_down<2> : public dlib::impl::pyramid_down_2_1 {};
+
+ template <>
+ class pyramid_down<3> : public dlib::impl::pyramid_down_3_2 {};
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ template <unsigned int N>
+ double pyramid_rate(const pyramid_down<N>&)
+ {
+ return (N-1.0)/N;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <unsigned int N>
+ void find_pyramid_down_output_image_size(
+ const pyramid_down<N>& pyr,
+ long& nr,
+ long& nc
+ )
+ {
+ const double rate = pyramid_rate(pyr);
+ nr = std::floor(rate*nr);
+ nc = std::floor(rate*nc);
+ }
+
+ inline void find_pyramid_down_output_image_size(
+ const pyramid_down<3>& /*pyr*/,
+ long& nr,
+ long& nc
+ )
+ {
+ nr = 2*(nr-2)/3;
+ nc = 2*(nc-2)/3;
+ }
+
+ inline void find_pyramid_down_output_image_size(
+ const pyramid_down<2>& /*pyr*/,
+ long& nr,
+ long& nc
+ )
+ {
+ nr = (nr-3)/2;
+ nc = (nc-3)/2;
+ }
+
+ inline void find_pyramid_down_output_image_size(
+ const pyramid_down<1>& /*pyr*/,
+ long& nr,
+ long& nc
+ )
+ {
+ nr = 0;
+ nc = 0;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ namespace impl
+ {
+ template <typename pyramid_type>
+ void compute_tiled_image_pyramid_details (
+ const pyramid_type& pyr,
+ long nr,
+ long nc,
+ const unsigned long padding,
+ const unsigned long outer_padding,
+ std::vector<rectangle>& rects,
+ long& pyramid_image_nr,
+ long& pyramid_image_nc
+ )
+ {
+ rects.clear();
+ if (nr*nc == 0)
+ {
+ pyramid_image_nr = 0;
+ pyramid_image_nc = 0;
+ return;
+ }
+
+ const long min_height = 5;
+ rects.reserve(100);
+ rects.push_back(rectangle(nc,nr));
+ // build the whole pyramid
+ while(true)
+ {
+ find_pyramid_down_output_image_size(pyr, nr, nc);
+ if (nr*nc == 0 || nr < min_height)
+ break;
+ rects.push_back(rectangle(nc,nr));
+ }
+
+ // figure out output image size
+ long total_height = 0;
+ for (auto&& i : rects)
+ total_height += i.height()+padding;
+ total_height -= padding*2; // don't add unnecessary padding to the very right side.
+ long height = 0;
+ long prev_width = 0;
+ for (auto&& i : rects)
+ {
+ // Figure out how far we go on the first column. We go until the next image can
+ // fit next to the previous one, which means we can double back for the second
+ // column of images.
+ if (i.width() <= rects[0].width()-prev_width-(long)padding &&
+ (height-rects[0].height())*2 >= (total_height-rects[0].height()))
+ {
+ break;
+ }
+ height += i.height() + padding;
+ prev_width = i.width();
+ }
+ height -= padding; // don't add unnecessary padding to the very right side.
+
+ const long width = rects[0].width();
+ pyramid_image_nr = height+outer_padding*2;
+ pyramid_image_nc = width+outer_padding*2;
+
+
+ long y = outer_padding;
+ size_t i = 0;
+ while(y < height+(long)outer_padding && i < rects.size())
+ {
+ rects[i] = translate_rect(rects[i],point(outer_padding,y));
+ DLIB_ASSERT(rectangle(pyramid_image_nc,pyramid_image_nr).contains(rects[i]));
+ y += rects[i].height()+padding;
+ ++i;
+ }
+ y -= padding;
+ while (i < rects.size())
+ {
+ point p1(outer_padding+width-1,y-1);
+ point p2 = p1 - rects[i].br_corner();
+ rectangle rect(p1,p2);
+ DLIB_ASSERT(rectangle(pyramid_image_nc,pyramid_image_nr).contains(rect));
+ // don't keep going on the last row if it would intersect the original image.
+ if (!rects[0].intersect(rect).is_empty())
+ break;
+
+ rects[i] = rect;
+ y -= rects[i].height()+padding;
+ ++i;
+ }
+
+ // Delete any extraneous rectangles if we broke out of the above loop early due to
+ // intersection with the original image.
+ rects.resize(i);
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename pyramid_type,
+ typename image_type1,
+ typename image_type2
+ >
+ void create_tiled_pyramid (
+ const image_type1& img,
+ image_type2& out_img,
+ std::vector<rectangle>& rects,
+ const unsigned long padding = 10,
+ const unsigned long outer_padding = 0
+ )
+ {
+ DLIB_ASSERT(!is_same_object(img, out_img));
+
+ long out_nr, out_nc;
+ pyramid_type pyr;
+ impl::compute_tiled_image_pyramid_details(pyr, img.nr(), img.nc(), padding, outer_padding, rects, out_nr, out_nc);
+
+ set_image_size(out_img, out_nr, out_nc);
+ assign_all_pixels(out_img, 0);
+
+ if (rects.size() == 0)
+ return;
+
+ // now build the image pyramid into out_img
+ auto si = sub_image(out_img, rects[0]);
+ assign_image(si, img);
+ for (size_t i = 1; i < rects.size(); ++i)
+ {
+ auto s1 = sub_image(out_img, rects[i-1]);
+ auto s2 = sub_image(out_img, rects[i]);
+ pyr(s1,s2);
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename pyramid_type
+ >
+ dpoint image_to_tiled_pyramid (
+ const std::vector<rectangle>& rects,
+ double scale,
+ dpoint p
+ )
+ {
+ DLIB_CASSERT(rects.size() > 0);
+ DLIB_CASSERT(0 < scale && scale <= 1);
+ pyramid_type pyr;
+ // This scale factor maps this many levels down the pyramid
+ long pyramid_down_iter = static_cast<long>(std::log(scale)/std::log(pyramid_rate(pyr))+0.5);
+ pyramid_down_iter = put_in_range(0, (long)rects.size()-1, pyramid_down_iter);
+
+ return rects[pyramid_down_iter].tl_corner() + pyr.point_down(p, pyramid_down_iter);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename pyramid_type
+ >
+ drectangle image_to_tiled_pyramid (
+ const std::vector<rectangle>& rects,
+ double scale,
+ drectangle r
+ )
+ {
+ DLIB_ASSERT(rects.size() > 0);
+ DLIB_ASSERT(0 < scale && scale <= 1);
+ return drectangle(image_to_tiled_pyramid<pyramid_type>(rects, scale, r.tl_corner()),
+ image_to_tiled_pyramid<pyramid_type>(rects, scale, r.br_corner()));
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename pyramid_type
+ >
+ dpoint tiled_pyramid_to_image (
+ const std::vector<rectangle>& rects,
+ dpoint p
+ )
+ {
+ DLIB_CASSERT(rects.size() > 0);
+
+ size_t pyramid_down_iter = nearest_rect(rects, p);
+
+ p -= rects[pyramid_down_iter].tl_corner();
+ pyramid_type pyr;
+ return pyr.point_up(p, pyramid_down_iter);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename pyramid_type
+ >
+ drectangle tiled_pyramid_to_image (
+ const std::vector<rectangle>& rects,
+ drectangle r
+ )
+ {
+ DLIB_CASSERT(rects.size() > 0);
+
+ size_t pyramid_down_iter = nearest_rect(rects, dcenter(r));
+
+ dpoint origin = rects[pyramid_down_iter].tl_corner();
+ r = drectangle(r.tl_corner()-origin, r.br_corner()-origin);
+ pyramid_type pyr;
+ return pyr.rect_up(r, pyramid_down_iter);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_IMAGE_PYRaMID_Hh_
+
diff --git a/ml/dlib/dlib/image_transforms/image_pyramid_abstract.h b/ml/dlib/dlib/image_transforms/image_pyramid_abstract.h
new file mode 100644
index 000000000..a61b275fd
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/image_pyramid_abstract.h
@@ -0,0 +1,384 @@
+// Copyright (C) 2010 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_IMAGE_PYRaMID_ABSTRACT_Hh_
+#ifdef DLIB_IMAGE_PYRaMID_ABSTRACT_Hh_
+
+#include "../pixel.h"
+#include "../array2d.h"
+#include "../geometry.h"
+#include "../image_processing/generic_image.h"
+
+namespace dlib
+{
+
+ template <
+ unsigned int N
+ >
+ class pyramid_down : noncopyable
+ {
+ /*!
+ REQUIREMENTS ON N
+ N > 0
+
+ WHAT THIS OBJECT REPRESENTS
+ This is a simple functor to help create image pyramids. In particular, it
+ downsamples images at a ratio of N to N-1.
+
+ Note that setting N to 1 means that this object functions like
+ pyramid_disable (defined at the bottom of this file).
+
+ WARNING, when mapping rectangles from one layer of a pyramid
+ to another you might end up with rectangles which extend slightly
+ outside your images. This is because points on the border of an
+ image at a higher pyramid layer might correspond to points outside
+ images at lower layers. So just keep this in mind. Note also
+ that it's easy to deal with. Just say something like this:
+ rect = rect.intersect(get_rect(my_image)); // keep rect inside my_image
+ !*/
+ public:
+
+ template <
+ typename in_image_type,
+ typename out_image_type
+ >
+ void operator() (
+ const in_image_type& original,
+ out_image_type& down
+ ) const;
+ /*!
+ requires
+ - is_same_object(original, down) == false
+ - in_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - out_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - for both pixel types P in the input and output images, we require:
+ - pixel_traits<P>::has_alpha == false
+ ensures
+ - #down will contain an image that is roughly (N-1)/N times the size of the
+ original image.
+ - If both input and output images contain RGB pixels then the downsampled image will
+ be in color. Otherwise, the downsampling will be performed in a grayscale mode.
+ - The location of a point P in original image will show up at point point_down(P)
+ in the #down image.
+ - Note that some points on the border of the original image might correspond to
+ points outside the #down image.
+ !*/
+
+ template <
+ typename image_type
+ >
+ void operator() (
+ image_type& img
+ ) const;
+ /*!
+ requires
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - pixel_traits<typename image_traits<image_type>::pixel_type>::has_alpha == false
+ ensures
+ - This function downsamples the given image and stores the results in #img.
+ In particular, it is equivalent to performing:
+ (*this)(img, temp);
+ swap(img, temp);
+ !*/
+
+ // -------------------------------
+
+ template <typename T>
+ vector<double,2> point_down (
+ const vector<T,2>& p
+ ) const;
+ /*!
+ ensures
+ - interprets p as a point in a parent image and returns the
+ point in a downsampled image which corresponds to p.
+ - This function is the inverse of point_up(). I.e. for a point P:
+ point_down(point_up(P)) == P
+ !*/
+
+ template <typename T>
+ vector<double,2> point_up (
+ const vector<T,2>& p
+ ) const;
+ /*!
+ ensures
+ - interprets p as a point in a downsampled image and returns the
+ point in a parent image which corresponds to p.
+ - This function is the inverse of point_down(). I.e. for a point P:
+ point_up(point_down(P)) == P
+ !*/
+
+ drectangle rect_down (
+ const drectangle& rect
+ ) const;
+ /*!
+ ensures
+ - returns drectangle(point_down(rect.tl_corner()), point_down(rect.br_corner()));
+ (i.e. maps rect into a downsampled)
+ !*/
+
+ drectangle rect_up (
+ const drectangle& rect
+ ) const;
+ /*!
+ ensures
+ - returns drectangle(point_up(rect.tl_corner()), point_up(rect.br_corner()));
+ (i.e. maps rect into a parent image)
+ !*/
+
+ // -------------------------------
+
+ template <typename T>
+ vector<double,2> point_down (
+ const vector<T,2>& p,
+ unsigned int levels
+ ) const;
+ /*!
+ ensures
+ - applies point_down() to p levels times and returns the result.
+ (i.e. point_down(p,2) == point_down(point_down(p)),
+ point_down(p,1) == point_down(p),
+ point_down(p,0) == p, etc. )
+ !*/
+
+ template <typename T>
+ vector<double,2> point_up (
+ const vector<T,2>& p,
+ unsigned int levels
+ ) const;
+ /*!
+ ensures
+ - applies point_up() to p levels times and returns the result.
+ (i.e. point_up(p,2) == point_up(point_up(p)),
+ point_up(p,1) == point_up(p),
+ point_up(p,0) == p, etc. )
+ !*/
+
+ drectangle rect_down (
+ const drectangle& rect,
+ unsigned int levels
+ ) const;
+ /*!
+ ensures
+ - returns drectangle(point_down(rect.tl_corner(),levels), point_down(rect.br_corner(),levels));
+ (i.e. Basically applies rect_down() to rect levels times and returns the result.)
+ !*/
+
+ drectangle rect_up (
+ const drectangle& rect,
+ unsigned int levels
+ ) const;
+ /*!
+ ensures
+ - returns drectangle(point_up(rect.tl_corner(),levels), point_up(rect.br_corner(),levels));
+ (i.e. Basically applies rect_up() to rect levels times and returns the result.)
+ !*/
+
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ class pyramid_disable : noncopyable
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This is a function object with an interface identical to pyramid_down (defined
+ at the top of this file) except that it downsamples images at a ratio of infinity
+ to 1. That means it always outputs images of size 0 regardless of the size
+ of the inputs.
+
+ This is useful because it can be supplied to routines which take a pyramid_down
+ function object and it will essentially disable pyramid processing. This way,
+ a pyramid oriented function can be turned into a regular routine which processes
+ just the original undownsampled image.
+ !*/
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ unsigned int N
+ >
+ double pyramid_rate(
+ const pyramid_down<N>& pyr
+ );
+ /*!
+ ensures
+ - returns (N-1.0)/N
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ unsigned int N
+ >
+ void find_pyramid_down_output_image_size(
+ const pyramid_down<N>& pyr,
+ long& nr,
+ long& nc
+ );
+ /*!
+ requires
+ - nr >= 0
+ - nc >= 0
+ ensures
+ - If pyr() were called on an image with nr by nc rows and columns, what would
+ be the size of the output image? This function finds the size of the output
+ image and stores it back into #nr and #nc.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename pyramid_type,
+ typename image_type1,
+ typename image_type2
+ >
+ void create_tiled_pyramid (
+ const image_type1& img,
+ image_type2& out_img,
+ std::vector<rectangle>& rects,
+ const unsigned long padding = 10,
+ const unsigned long outer_padding = 0
+ );
+ /*!
+ requires
+ - pyramid_type == one of the dlib::pyramid_down template instances defined above.
+ - is_same_object(img, out_img) == false
+ - image_type1 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - image_type2 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - for both pixel types P in the input and output images, we require:
+ - pixel_traits<P>::has_alpha == false
+ ensures
+ - Creates an image pyramid from the input image img. The pyramid is made using
+ pyramid_type. The highest resolution image is img and then all further
+ pyramid levels are generated from pyramid_type's downsampling. The entire
+ resulting pyramid is packed into a single image and stored in out_img.
+ - When packing pyramid levels into out_img, there will be padding pixels of
+ space between each sub-image. There will also be outer_padding pixels of
+ padding around the edge of the image. All padding pixels have a value of 0.
+ - The resulting pyramid will be composed of #rects.size() images packed into
+ out_img. Moreover, #rects[i] is the location inside out_img of the i-th
+ pyramid level.
+ - #rects.size() > 0
+ - #rects[0] == get_rect(img). I.e. the first rectangle is the highest
+ resolution pyramid layer. Subsequent elements of #rects correspond to
+ smaller and smaller pyramid layers inside out_img.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename pyramid_type
+ >
+ dpoint image_to_tiled_pyramid (
+ const std::vector<rectangle>& rects,
+ double scale,
+ dpoint p
+ );
+ /*!
+ requires
+ - pyramid_type == one of the dlib::pyramid_down template instances defined above.
+ - 0 < scale <= 1
+ - rects.size() > 0
+ ensures
+ - The function create_tiled_pyramid() converts an image, img, to a "tiled
+ pyramid" called out_img. It also outputs a vector of rectangles, rect, that
+ show where each pyramid layer appears in out_img. Therefore,
+ image_to_tiled_pyramid() allows you to map from coordinates in img (i.e. p)
+ to coordinates in the tiled pyramid out_img, when given the rects metadata.
+
+ So given a point p in img, you can ask, what coordinate in out_img
+ corresponds to img[p.y()][p.x()] when things are scale times smaller? This
+ new coordinate is a location in out_img and is what is returned by this
+ function.
+ - A scale of 1 means we don't move anywhere in the pyramid scale space relative
+ to the input image while smaller values of scale mean we move down the
+ pyramid.
+ - Assumes pyramid_type is the pyramid class used to produce the tiled image.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename pyramid_type
+ >
+ drectangle image_to_tiled_pyramid (
+ const std::vector<rectangle>& rects,
+ double scale,
+ drectangle r
+ );
+ /*!
+ requires
+ - pyramid_type == one of the dlib::pyramid_down template instances defined above.
+ - 0 < scale <= 1
+ - rects.size() > 0
+ ensures
+ - This function maps from input image space to tiled pyramid coordinate space
+ just as the above image_to_tiled_pyramid() does, except it operates on
+ rectangle objects instead of points.
+ - Assumes pyramid_type is the pyramid class used to produce the tiled image.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename pyramid_type
+ >
+ dpoint tiled_pyramid_to_image (
+ const std::vector<rectangle>& rects,
+ dpoint p
+ );
+ /*!
+ requires
+ - pyramid_type == one of the dlib::pyramid_down template instances defined above.
+ - rects.size() > 0
+ ensures
+ - This function maps from a coordinate in a tiled pyramid to the corresponding
+ input image coordinate. Therefore, it is essentially the inverse of
+ image_to_tiled_pyramid().
+ - It should be noted that this function isn't always an inverse of
+ image_to_tiled_pyramid(). This is because you can ask
+ image_to_tiled_pyramid() for the coordinates of points outside the input
+ image and they will be mapped to somewhere that doesn't have an inverse. But
+ for points actually inside the image this function performs an approximate
+ inverse mapping.
+ - Assumes pyramid_type is the pyramid class used to produce the tiled image.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename pyramid_type
+ >
+ drectangle tiled_pyramid_to_image (
+ const std::vector<rectangle>& rects,
+ drectangle r
+ );
+ /*!
+ requires
+ - pyramid_type == one of the dlib::pyramid_down template instances defined above.
+ - rects.size() > 0
+ ensures
+ - This function maps from a coordinate in a tiled pyramid to the corresponding
+ input image coordinate. Therefore, it is essentially the inverse of
+ image_to_tiled_pyramid().
+ - It should be noted that this function isn't always an inverse of
+ image_to_tiled_pyramid(). This is because you can ask
+ image_to_tiled_pyramid() for the coordinates of points outside the input
+ image and they will be mapped to somewhere that doesn't have an inverse. But
+ for points actually inside the image this function performs an approximate
+ inverse mapping.
+ - Assumes pyramid_type is the pyramid class used to produce the tiled image.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_IMAGE_PYRaMID_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/image_transforms/integral_image.h b/ml/dlib/dlib/image_transforms/integral_image.h
new file mode 100644
index 000000000..2ae47d921
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/integral_image.h
@@ -0,0 +1,190 @@
+// Copyright (C) 2009 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_INTEGRAL_IMAGE
+#define DLIB_INTEGRAL_IMAGE
+
+#include "integral_image_abstract.h"
+
+#include "../algs.h"
+#include "../assert.h"
+#include "../geometry.h"
+#include "../array2d.h"
+#include "../matrix.h"
+#include "../pixel.h"
+#include "../noncopyable.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename T
+ >
+ class integral_image_generic : noncopyable
+ {
+ public:
+ typedef T value_type;
+
+ long nr() const { return int_img.nr(); }
+ long nc() const { return int_img.nc(); }
+
+ template <typename image_type>
+ void load (
+ const image_type& img_
+ )
+ {
+ const_image_view<image_type> img(img_);
+ T pixel;
+ int_img.set_size(img.nr(), img.nc());
+
+ // compute the first row of the integral image
+ T temp = 0;
+ for (long c = 0; c < img.nc(); ++c)
+ {
+ assign_pixel(pixel, img[0][c]);
+ temp += pixel;
+ int_img[0][c] = temp;
+ }
+
+ // now compute the rest of the integral image
+ for (long r = 1; r < img.nr(); ++r)
+ {
+ temp = 0;
+ for (long c = 0; c < img.nc(); ++c)
+ {
+ assign_pixel(pixel, img[r][c]);
+ temp += pixel;
+ int_img[r][c] = temp + int_img[r-1][c];
+ }
+ }
+
+ }
+
+ value_type get_sum_of_area (
+ const rectangle& rect
+ ) const
+ {
+ DLIB_ASSERT(get_rect(*this).contains(rect) == true && rect.is_empty() == false,
+ "\tvalue_type get_sum_of_area(rect)"
+ << "\n\tYou have given a rectangle that goes outside the image"
+ << "\n\tthis: " << this
+ << "\n\trect.is_empty(): " << rect.is_empty()
+ << "\n\trect: " << rect
+ << "\n\tget_rect(*this): " << get_rect(*this)
+ );
+
+ T top_left = 0, top_right = 0, bottom_left = 0, bottom_right = 0;
+
+ bottom_right = int_img[rect.bottom()][rect.right()];
+ if (rect.left()-1 >= 0 && rect.top()-1 >= 0)
+ {
+ top_left = int_img[rect.top()-1][rect.left()-1];
+ bottom_left = int_img[rect.bottom()][rect.left()-1];
+ top_right = int_img[rect.top()-1][rect.right()];
+ }
+ else if (rect.left()-1 >= 0)
+ {
+ bottom_left = int_img[rect.bottom()][rect.left()-1];
+ }
+ else if (rect.top()-1 >= 0)
+ {
+ top_right = int_img[rect.top()-1][rect.right()];
+ }
+
+ return bottom_right - bottom_left - top_right + top_left;
+ }
+
+ void swap(integral_image_generic& item)
+ {
+ int_img.swap(item.int_img);
+ }
+
+ private:
+
+ array2d<T> int_img;
+ };
+
+
+ template <
+ typename T
+ >
+ void swap (
+ integral_image_generic<T>& a,
+ integral_image_generic<T>& b
+ ) { a.swap(b); }
+
+// ----------------------------------------------------------------------------------------
+
+ typedef integral_image_generic<long> integral_image;
+
+// ----------------------------------------------------------------------------------------
+
+ template <typename integral_image_type>
+ typename integral_image_type::value_type haar_x (
+ const integral_image_type& img,
+ const point& p,
+ long width
+ )
+ {
+ DLIB_ASSERT(get_rect(img).contains(centered_rect(p,width,width)) == true,
+ "\tlong haar_x(img,p,width)"
+ << "\n\tYou have given a point and with that goes outside the image"
+ << "\n\tget_rect(img): " << get_rect(img)
+ << "\n\tp: " << p
+ << "\n\twidth: " << width
+ );
+
+ rectangle left_rect;
+ left_rect.set_left ( p.x() - width / 2 );
+ left_rect.set_top ( p.y() - width / 2 );
+ left_rect.set_right ( p.x()-1 );
+ left_rect.set_bottom ( left_rect.top() + width - 1 );
+
+ rectangle right_rect;
+ right_rect.set_left ( p.x() );
+ right_rect.set_top ( left_rect.top() );
+ right_rect.set_right ( left_rect.left() + width -1 );
+ right_rect.set_bottom ( left_rect.bottom() );
+
+ return img.get_sum_of_area(right_rect) - img.get_sum_of_area(left_rect);
+ }
+
+ // ----------------------------------------------------------------------------
+
+ template <typename integral_image_type>
+ typename integral_image_type::value_type haar_y (
+ const integral_image_type& img,
+ const point& p,
+ long width
+ )
+ {
+ DLIB_ASSERT(get_rect(img).contains(centered_rect(p,width,width)) == true,
+ "\tlong haar_y(img,p,width)"
+ << "\n\tYou have given a point and with that goes outside the image"
+ << "\n\tget_rect(img): " << get_rect(img)
+ << "\n\tp: " << p
+ << "\n\twidth: " << width
+ );
+
+ rectangle top_rect;
+ top_rect.set_left ( p.x() - width / 2 );
+ top_rect.set_top ( p.y() - width / 2 );
+ top_rect.set_right ( top_rect.left() + width - 1 );
+ top_rect.set_bottom ( p.y()-1 );
+
+ rectangle bottom_rect;
+ bottom_rect.set_left ( top_rect.left() );
+ bottom_rect.set_top ( p.y() );
+ bottom_rect.set_right ( top_rect.right() );
+ bottom_rect.set_bottom ( top_rect.top() + width - 1 );
+
+ return img.get_sum_of_area(bottom_rect) - img.get_sum_of_area(top_rect);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_INTEGRAL_IMAGE
+
diff --git a/ml/dlib/dlib/image_transforms/integral_image_abstract.h b/ml/dlib/dlib/image_transforms/integral_image_abstract.h
new file mode 100644
index 000000000..583fa0375
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/integral_image_abstract.h
@@ -0,0 +1,169 @@
+// Copyright (C) 2009 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_INTEGRAL_IMAGe_ABSTRACT_
+#ifdef DLIB_INTEGRAL_IMAGe_ABSTRACT_
+
+#include "../geometry/rectangle_abstract.h"
+#include "../array2d/array2d_kernel_abstract.h"
+#include "../pixel.h"
+#include "../noncopyable.h"
+#include "../image_processing/generic_image.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename T
+ >
+ class integral_image_generic : noncopyable
+ {
+ /*!
+ REQUIREMENTS ON T
+ T should be a built in scalar type. Moreover, it should
+ be capable of storing sums of whatever kind of pixel
+ you will be dealing with.
+
+ INITIAL VALUE
+ - nr() == 0
+ - nc() == 0
+
+ WHAT THIS OBJECT REPRESENTS
+ This object is an alternate way of representing image data
+ that allows for very fast computations of sums of pixels in
+ rectangular regions. To use this object you load it with a
+ normal image and then you can use the get_sum_of_area()
+ function to compute sums of pixels in a given area in
+ constant time.
+ !*/
+ public:
+ typedef T value_type;
+
+ const long nr(
+ ) const;
+ /*!
+ ensures
+ - returns the number of rows in this integral image object
+ !*/
+
+ const long nc(
+ ) const;
+ /*!
+ ensures
+ - returns the number of columns in this integral image object
+ !*/
+
+ template <typename image_type>
+ void load (
+ const image_type& img
+ );
+ /*!
+ requires
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - Let P denote the type of pixel in img, then we require:
+ - pixel_traits<P>::has_alpha == false
+ ensures
+ - #nr() == img.nr()
+ - #nc() == img.nc()
+ - #*this will now contain an "integral image" representation of the
+ given input image.
+ !*/
+
+ value_type get_sum_of_area (
+ const rectangle& rect
+ ) const;
+ /*!
+ requires
+ - rect.is_empty() == false
+ - get_rect(*this).contains(rect) == true
+ (i.e. rect must not be outside the integral image)
+ ensures
+ - Let O denote the image this integral image was generated from.
+ Then this function returns sum(subm(mat(O),rect)).
+ That is, this function returns the sum of the pixels in O that
+ are contained within the given rectangle.
+ !*/
+
+ void swap(
+ integral_image_generic& item
+ );
+ /*!
+ ensures
+ - swaps *this and item
+ !*/
+
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ template < typename T >
+ void swap (
+ integral_image_generic<T>& a,
+ integral_image_generic<T>& b
+ ) { a.swap(b); }
+ /*!
+ provides a global swap function
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ typedef integral_image_generic<long> integral_image;
+
+// ----------------------------------------------------------------------------------------
+
+ template <typename integral_image_type>
+ typename integral_image_type::value_type haar_x (
+ const integral_image_type& img,
+ const point& p,
+ long width
+ )
+ /*!
+ requires
+ - get_rect(img).contains(centered_rect(p,width,width)) == true
+ - integral_image_type == a type that implements the integral_image_generic
+ interface defined above
+ ensures
+ - returns the response of a Haar wavelet centered at the point p
+ with the given width. The wavelet is oriented along the X axis
+ and has the following shape:
+ ----++++
+ ----++++
+ ----++++
+ ----++++
+ That is, the wavelet is square and computes the sum of pixels on the
+ right minus the sum of pixels on the left.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <typename integral_image_type>
+ typename integral_image_type::value_type haar_y (
+ const integral_image_type& img,
+ const point& p,
+ long width
+ )
+ /*!
+ requires
+ - get_rect(img).contains(centered_rect(p,width,width)) == true
+ - integral_image_type == a type that implements the integral_image_generic
+ interface defined above
+ ensures
+ - returns the response of a Haar wavelet centered at the point p
+ with the given width in the given image. The wavelet is oriented
+ along the Y axis and has the following shape:
+ --------
+ --------
+ ++++++++
+ ++++++++
+ That is, the wavelet is square and computes the sum of pixels on the
+ bottom minus the sum of pixels on the top.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_INTEGRAL_IMAGe_ABSTRACT_
+
diff --git a/ml/dlib/dlib/image_transforms/interpolation.h b/ml/dlib/dlib/image_transforms/interpolation.h
new file mode 100644
index 000000000..11c561e2d
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/interpolation.h
@@ -0,0 +1,2193 @@
+// Copyright (C) 2012 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_INTERPOlATIONh_
+#define DLIB_INTERPOlATIONh_
+
+#include "interpolation_abstract.h"
+#include "../pixel.h"
+#include "../matrix.h"
+#include "assign_image.h"
+#include "image_pyramid.h"
+#include "../simd.h"
+#include "../image_processing/full_object_detection.h"
+#include <limits>
+#include "../rand.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ template <typename T>
+ struct sub_image_proxy
+ {
+ sub_image_proxy() = default;
+
+ sub_image_proxy (
+ T& img,
+ rectangle rect
+ )
+ {
+ rect = rect.intersect(get_rect(img));
+ typedef typename image_traits<T>::pixel_type pixel_type;
+
+ _nr = rect.height();
+ _nc = rect.width();
+ _width_step = width_step(img);
+ _data = (char*)image_data(img) + sizeof(pixel_type)*rect.left() + rect.top()*_width_step;
+ }
+
+ void* _data = 0;
+ long _width_step = 0;
+ long _nr = 0;
+ long _nc = 0;
+ };
+
+ template <typename T>
+ struct const_sub_image_proxy
+ {
+ const_sub_image_proxy() = default;
+
+ const_sub_image_proxy (
+ const T& img,
+ rectangle rect
+ )
+ {
+ rect = rect.intersect(get_rect(img));
+ typedef typename image_traits<T>::pixel_type pixel_type;
+
+ _nr = rect.height();
+ _nc = rect.width();
+ _width_step = width_step(img);
+ _data = (const char*)image_data(img) + sizeof(pixel_type)*rect.left() + rect.top()*_width_step;
+ }
+
+ const void* _data = 0;
+ long _width_step = 0;
+ long _nr = 0;
+ long _nc = 0;
+ };
+
+ template <typename T>
+ struct image_traits<sub_image_proxy<T> >
+ {
+ typedef typename image_traits<T>::pixel_type pixel_type;
+ };
+ template <typename T>
+ struct image_traits<const sub_image_proxy<T> >
+ {
+ typedef typename image_traits<T>::pixel_type pixel_type;
+ };
+ template <typename T>
+ struct image_traits<const_sub_image_proxy<T> >
+ {
+ typedef typename image_traits<T>::pixel_type pixel_type;
+ };
+ template <typename T>
+ struct image_traits<const const_sub_image_proxy<T> >
+ {
+ typedef typename image_traits<T>::pixel_type pixel_type;
+ };
+
+ template <typename T>
+ inline long num_rows( const sub_image_proxy<T>& img) { return img._nr; }
+ template <typename T>
+ inline long num_columns( const sub_image_proxy<T>& img) { return img._nc; }
+
+ template <typename T>
+ inline long num_rows( const const_sub_image_proxy<T>& img) { return img._nr; }
+ template <typename T>
+ inline long num_columns( const const_sub_image_proxy<T>& img) { return img._nc; }
+
+ template <typename T>
+ inline void* image_data( sub_image_proxy<T>& img)
+ {
+ return img._data;
+ }
+ template <typename T>
+ inline const void* image_data( const sub_image_proxy<T>& img)
+ {
+ return img._data;
+ }
+
+ template <typename T>
+ inline const void* image_data( const const_sub_image_proxy<T>& img)
+ {
+ return img._data;
+ }
+
+ template <typename T>
+ inline long width_step(
+ const sub_image_proxy<T>& img
+ ) { return img._width_step; }
+
+ template <typename T>
+ inline long width_step(
+ const const_sub_image_proxy<T>& img
+ ) { return img._width_step; }
+
+ template <typename T>
+ void set_image_size(sub_image_proxy<T>& img, long rows, long cols)
+ {
+ DLIB_CASSERT(img._nr == rows && img._nc == cols, "A sub_image can't be resized."
+ << "\n\t img._nr: "<< img._nr
+ << "\n\t img._nc: "<< img._nc
+ << "\n\t rows: "<< rows
+ << "\n\t cols: "<< cols
+ );
+ }
+
+ template <
+ typename image_type
+ >
+ sub_image_proxy<image_type> sub_image (
+ image_type& img,
+ const rectangle& rect
+ )
+ {
+ return sub_image_proxy<image_type>(img,rect);
+ }
+
+ template <
+ typename image_type
+ >
+ const const_sub_image_proxy<image_type> sub_image (
+ const image_type& img,
+ const rectangle& rect
+ )
+ {
+ return const_sub_image_proxy<image_type>(img,rect);
+ }
+
+ template <typename T>
+ inline sub_image_proxy<matrix<T>> sub_image (
+ T* img,
+ long nr,
+ long nc,
+ long row_stride
+ )
+ {
+ sub_image_proxy<matrix<T>> tmp;
+ tmp._data = img;
+ tmp._nr = nr;
+ tmp._nc = nc;
+ tmp._width_step = row_stride*sizeof(T);
+ return tmp;
+ }
+
+ template <typename T>
+ inline const const_sub_image_proxy<matrix<T>> sub_image (
+ const T* img,
+ long nr,
+ long nc,
+ long row_stride
+ )
+ {
+ const_sub_image_proxy<matrix<T>> tmp;
+ tmp._data = img;
+ tmp._nr = nr;
+ tmp._nc = nc;
+ tmp._width_step = row_stride*sizeof(T);
+ return tmp;
+ }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ class interpolate_nearest_neighbor
+ {
+ public:
+
+ template <typename image_view_type, typename pixel_type>
+ bool operator() (
+ const image_view_type& img,
+ const dlib::point& p,
+ pixel_type& result
+ ) const
+ {
+ COMPILE_TIME_ASSERT(pixel_traits<typename image_view_type::pixel_type>::has_alpha == false);
+
+ if (get_rect(img).contains(p))
+ {
+ assign_pixel(result, img[p.y()][p.x()]);
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ class interpolate_bilinear
+ {
+ template <typename T>
+ struct is_rgb_image
+ {
+ const static bool value = pixel_traits<typename T::pixel_type>::rgb;
+ };
+
+ public:
+
+ template <typename T, typename image_view_type, typename pixel_type>
+ typename disable_if<is_rgb_image<image_view_type>,bool>::type operator() (
+ const image_view_type& img,
+ const dlib::vector<T,2>& p,
+ pixel_type& result
+ ) const
+ {
+ COMPILE_TIME_ASSERT(pixel_traits<typename image_view_type::pixel_type>::has_alpha == false);
+
+ const long left = static_cast<long>(std::floor(p.x()));
+ const long top = static_cast<long>(std::floor(p.y()));
+ const long right = left+1;
+ const long bottom = top+1;
+
+
+ // if the interpolation goes outside img
+ if (!(left >= 0 && top >= 0 && right < img.nc() && bottom < img.nr()))
+ return false;
+
+ const double lr_frac = p.x() - left;
+ const double tb_frac = p.y() - top;
+
+ double tl = 0, tr = 0, bl = 0, br = 0;
+
+ assign_pixel(tl, img[top][left]);
+ assign_pixel(tr, img[top][right]);
+ assign_pixel(bl, img[bottom][left]);
+ assign_pixel(br, img[bottom][right]);
+
+ double temp = (1-tb_frac)*((1-lr_frac)*tl + lr_frac*tr) +
+ tb_frac*((1-lr_frac)*bl + lr_frac*br);
+
+ assign_pixel(result, temp);
+ return true;
+ }
+
+ template <typename T, typename image_view_type, typename pixel_type>
+ typename enable_if<is_rgb_image<image_view_type>,bool>::type operator() (
+ const image_view_type& img,
+ const dlib::vector<T,2>& p,
+ pixel_type& result
+ ) const
+ {
+ COMPILE_TIME_ASSERT(pixel_traits<typename image_view_type::pixel_type>::has_alpha == false);
+
+ const long left = static_cast<long>(std::floor(p.x()));
+ const long top = static_cast<long>(std::floor(p.y()));
+ const long right = left+1;
+ const long bottom = top+1;
+
+
+ // if the interpolation goes outside img
+ if (!(left >= 0 && top >= 0 && right < img.nc() && bottom < img.nr()))
+ return false;
+
+ const double lr_frac = p.x() - left;
+ const double tb_frac = p.y() - top;
+
+ double tl, tr, bl, br;
+
+ tl = img[top][left].red;
+ tr = img[top][right].red;
+ bl = img[bottom][left].red;
+ br = img[bottom][right].red;
+ const double red = (1-tb_frac)*((1-lr_frac)*tl + lr_frac*tr) +
+ tb_frac*((1-lr_frac)*bl + lr_frac*br);
+
+ tl = img[top][left].green;
+ tr = img[top][right].green;
+ bl = img[bottom][left].green;
+ br = img[bottom][right].green;
+ const double green = (1-tb_frac)*((1-lr_frac)*tl + lr_frac*tr) +
+ tb_frac*((1-lr_frac)*bl + lr_frac*br);
+
+ tl = img[top][left].blue;
+ tr = img[top][right].blue;
+ bl = img[bottom][left].blue;
+ br = img[bottom][right].blue;
+ const double blue = (1-tb_frac)*((1-lr_frac)*tl + lr_frac*tr) +
+ tb_frac*((1-lr_frac)*bl + lr_frac*br);
+
+ rgb_pixel temp;
+ assign_pixel(temp.red, red);
+ assign_pixel(temp.green, green);
+ assign_pixel(temp.blue, blue);
+ assign_pixel(result, temp);
+ return true;
+ }
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ class interpolate_quadratic
+ {
+ template <typename T>
+ struct is_rgb_image
+ {
+ const static bool value = pixel_traits<typename T::pixel_type>::rgb;
+ };
+
+ public:
+
+ template <typename T, typename image_view_type, typename pixel_type>
+ typename disable_if<is_rgb_image<image_view_type>,bool>::type operator() (
+ const image_view_type& img,
+ const dlib::vector<T,2>& p,
+ pixel_type& result
+ ) const
+ {
+ COMPILE_TIME_ASSERT(pixel_traits<typename image_view_type::pixel_type>::has_alpha == false);
+
+ const point pp(p);
+
+ // if the interpolation goes outside img
+ if (!get_rect(img).contains(grow_rect(pp,1)))
+ return false;
+
+ const long r = pp.y();
+ const long c = pp.x();
+
+ const double temp = interpolate(p-pp,
+ img[r-1][c-1],
+ img[r-1][c ],
+ img[r-1][c+1],
+ img[r ][c-1],
+ img[r ][c ],
+ img[r ][c+1],
+ img[r+1][c-1],
+ img[r+1][c ],
+ img[r+1][c+1]);
+
+ assign_pixel(result, temp);
+ return true;
+ }
+
+ template <typename T, typename image_view_type, typename pixel_type>
+ typename enable_if<is_rgb_image<image_view_type>,bool>::type operator() (
+ const image_view_type& img,
+ const dlib::vector<T,2>& p,
+ pixel_type& result
+ ) const
+ {
+ COMPILE_TIME_ASSERT(pixel_traits<typename image_view_type::pixel_type>::has_alpha == false);
+
+ const point pp(p);
+
+ // if the interpolation goes outside img
+ if (!get_rect(img).contains(grow_rect(pp,1)))
+ return false;
+
+ const long r = pp.y();
+ const long c = pp.x();
+
+ const double red = interpolate(p-pp,
+ img[r-1][c-1].red,
+ img[r-1][c ].red,
+ img[r-1][c+1].red,
+ img[r ][c-1].red,
+ img[r ][c ].red,
+ img[r ][c+1].red,
+ img[r+1][c-1].red,
+ img[r+1][c ].red,
+ img[r+1][c+1].red);
+ const double green = interpolate(p-pp,
+ img[r-1][c-1].green,
+ img[r-1][c ].green,
+ img[r-1][c+1].green,
+ img[r ][c-1].green,
+ img[r ][c ].green,
+ img[r ][c+1].green,
+ img[r+1][c-1].green,
+ img[r+1][c ].green,
+ img[r+1][c+1].green);
+ const double blue = interpolate(p-pp,
+ img[r-1][c-1].blue,
+ img[r-1][c ].blue,
+ img[r-1][c+1].blue,
+ img[r ][c-1].blue,
+ img[r ][c ].blue,
+ img[r ][c+1].blue,
+ img[r+1][c-1].blue,
+ img[r+1][c ].blue,
+ img[r+1][c+1].blue);
+
+
+ rgb_pixel temp;
+ assign_pixel(temp.red, red);
+ assign_pixel(temp.green, green);
+ assign_pixel(temp.blue, blue);
+ assign_pixel(result, temp);
+
+ return true;
+ }
+
+ private:
+
+ /* tl tm tr
+ ml mm mr
+ bl bm br
+ */
+ // The above is the pixel layout in our little 3x3 neighborhood. interpolate() will
+ // fit a quadratic to these 9 pixels and then use that quadratic to find the interpolated
+ // value at point p.
+ inline double interpolate(
+ const dlib::vector<double,2>& p,
+ double tl, double tm, double tr,
+ double ml, double mm, double mr,
+ double bl, double bm, double br
+ ) const
+ {
+ matrix<double,6,1> w;
+ // x
+ w(0) = (tr + mr + br - tl - ml - bl)*0.16666666666;
+ // y
+ w(1) = (bl + bm + br - tl - tm - tr)*0.16666666666;
+ // x^2
+ w(2) = (tl + tr + ml + mr + bl + br)*0.16666666666 - (tm + mm + bm)*0.333333333;
+ // x*y
+ w(3) = (tl - tr - bl + br)*0.25;
+ // y^2
+ w(4) = (tl + tm + tr + bl + bm + br)*0.16666666666 - (ml + mm + mr)*0.333333333;
+ // 1 (constant term)
+ w(5) = (tm + ml + mr + bm)*0.222222222 - (tl + tr + bl + br)*0.11111111 + (mm)*0.55555556;
+
+ const double x = p.x();
+ const double y = p.y();
+
+ matrix<double,6,1> z;
+ z = x, y, x*x, x*y, y*y, 1.0;
+
+ return dot(w,z);
+ }
+ };
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ class black_background
+ {
+ public:
+ template <typename pixel_type>
+ void operator() ( pixel_type& p) const { assign_pixel(p, 0); }
+ };
+
+ class white_background
+ {
+ public:
+ template <typename pixel_type>
+ void operator() ( pixel_type& p) const { assign_pixel(p, 255); }
+ };
+
+ class no_background
+ {
+ public:
+ template <typename pixel_type>
+ void operator() ( pixel_type& ) const { }
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type1,
+ typename image_type2,
+ typename interpolation_type,
+ typename point_mapping_type,
+ typename background_type
+ >
+ void transform_image (
+ const image_type1& in_img,
+ image_type2& out_img,
+ const interpolation_type& interp,
+ const point_mapping_type& map_point,
+ const background_type& set_background,
+ const rectangle& area
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( get_rect(out_img).contains(area) == true &&
+ is_same_object(in_img, out_img) == false ,
+ "\t void transform_image()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t get_rect(out_img).contains(area): " << get_rect(out_img).contains(area)
+ << "\n\t get_rect(out_img): " << get_rect(out_img)
+ << "\n\t area: " << area
+ << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img)
+ );
+
+ const_image_view<image_type1> imgv(in_img);
+ image_view<image_type2> out_imgv(out_img);
+
+ for (long r = area.top(); r <= area.bottom(); ++r)
+ {
+ for (long c = area.left(); c <= area.right(); ++c)
+ {
+ if (!interp(imgv, map_point(dlib::vector<double,2>(c,r)), out_imgv[r][c]))
+ set_background(out_imgv[r][c]);
+ }
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type1,
+ typename image_type2,
+ typename interpolation_type,
+ typename point_mapping_type,
+ typename background_type
+ >
+ void transform_image (
+ const image_type1& in_img,
+ image_type2& out_img,
+ const interpolation_type& interp,
+ const point_mapping_type& map_point,
+ const background_type& set_background
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( is_same_object(in_img, out_img) == false ,
+ "\t void transform_image()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img)
+ );
+
+ transform_image(in_img, out_img, interp, map_point, set_background, get_rect(out_img));
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type1,
+ typename image_type2,
+ typename interpolation_type,
+ typename point_mapping_type
+ >
+ void transform_image (
+ const image_type1& in_img,
+ image_type2& out_img,
+ const interpolation_type& interp,
+ const point_mapping_type& map_point
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( is_same_object(in_img, out_img) == false ,
+ "\t void transform_image()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img)
+ );
+
+
+ transform_image(in_img, out_img, interp, map_point, black_background(), get_rect(out_img));
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type1,
+ typename image_type2,
+ typename interpolation_type
+ >
+ point_transform_affine rotate_image (
+ const image_type1& in_img,
+ image_type2& out_img,
+ double angle,
+ const interpolation_type& interp
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( is_same_object(in_img, out_img) == false ,
+ "\t point_transform_affine rotate_image()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img)
+ );
+
+ const rectangle rimg = get_rect(in_img);
+
+
+ // figure out bounding box for rotated rectangle
+ rectangle rect;
+ rect += rotate_point(center(rimg), rimg.tl_corner(), -angle);
+ rect += rotate_point(center(rimg), rimg.tr_corner(), -angle);
+ rect += rotate_point(center(rimg), rimg.bl_corner(), -angle);
+ rect += rotate_point(center(rimg), rimg.br_corner(), -angle);
+ set_image_size(out_img, rect.height(), rect.width());
+
+ const matrix<double,2,2> R = rotation_matrix(angle);
+
+ point_transform_affine trans = point_transform_affine(R, -R*dcenter(get_rect(out_img)) + dcenter(rimg));
+ transform_image(in_img, out_img, interp, trans);
+ return inv(trans);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type1,
+ typename image_type2
+ >
+ point_transform_affine rotate_image (
+ const image_type1& in_img,
+ image_type2& out_img,
+ double angle
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( is_same_object(in_img, out_img) == false ,
+ "\t point_transform_affine rotate_image()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img)
+ );
+
+ return rotate_image(in_img, out_img, angle, interpolate_quadratic());
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ namespace impl
+ {
+ class helper_resize_image
+ {
+ public:
+ helper_resize_image(
+ double x_scale_,
+ double y_scale_
+ ):
+ x_scale(x_scale_),
+ y_scale(y_scale_)
+ {}
+
+ dlib::vector<double,2> operator() (
+ const dlib::vector<double,2>& p
+ ) const
+ {
+ return dlib::vector<double,2>(p.x()*x_scale, p.y()*y_scale);
+ }
+
+ private:
+ const double x_scale;
+ const double y_scale;
+ };
+ }
+
+ template <
+ typename image_type1,
+ typename image_type2,
+ typename interpolation_type
+ >
+ void resize_image (
+ const image_type1& in_img,
+ image_type2& out_img,
+ const interpolation_type& interp
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( is_same_object(in_img, out_img) == false ,
+ "\t void resize_image()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img)
+ );
+
+ const double x_scale = (num_columns(in_img)-1)/(double)std::max<long>((num_columns(out_img)-1),1);
+ const double y_scale = (num_rows(in_img)-1)/(double)std::max<long>((num_rows(out_img)-1),1);
+ transform_image(in_img, out_img, interp,
+ dlib::impl::helper_resize_image(x_scale,y_scale));
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <typename image_type>
+ struct is_rgb_image { const static bool value = pixel_traits<typename image_traits<image_type>::pixel_type>::rgb; };
+ template <typename image_type>
+ struct is_grayscale_image { const static bool value = pixel_traits<typename image_traits<image_type>::pixel_type>::grayscale; };
+
+ // This is an optimized version of resize_image for the case where bilinear
+ // interpolation is used.
+ template <
+ typename image_type1,
+ typename image_type2
+ >
+ typename disable_if_c<(is_rgb_image<image_type1>::value&&is_rgb_image<image_type2>::value) ||
+ (is_grayscale_image<image_type1>::value&&is_grayscale_image<image_type2>::value)>::type
+ resize_image (
+ const image_type1& in_img_,
+ image_type2& out_img_,
+ interpolate_bilinear
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( is_same_object(in_img_, out_img_) == false ,
+ "\t void resize_image()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t is_same_object(in_img_, out_img_): " << is_same_object(in_img_, out_img_)
+ );
+
+ const_image_view<image_type1> in_img(in_img_);
+ image_view<image_type2> out_img(out_img_);
+
+ if (out_img.size() == 0 || in_img.size() == 0)
+ return;
+
+
+ typedef typename image_traits<image_type1>::pixel_type T;
+ typedef typename image_traits<image_type2>::pixel_type U;
+ const double x_scale = (in_img.nc()-1)/(double)std::max<long>((out_img.nc()-1),1);
+ const double y_scale = (in_img.nr()-1)/(double)std::max<long>((out_img.nr()-1),1);
+ double y = -y_scale;
+ for (long r = 0; r < out_img.nr(); ++r)
+ {
+ y += y_scale;
+ const long top = static_cast<long>(std::floor(y));
+ const long bottom = std::min(top+1, in_img.nr()-1);
+ const double tb_frac = y - top;
+ double x = -x_scale;
+ if (pixel_traits<U>::grayscale)
+ {
+ for (long c = 0; c < out_img.nc(); ++c)
+ {
+ x += x_scale;
+ const long left = static_cast<long>(std::floor(x));
+ const long right = std::min(left+1, in_img.nc()-1);
+ const double lr_frac = x - left;
+
+ double tl = 0, tr = 0, bl = 0, br = 0;
+
+ assign_pixel(tl, in_img[top][left]);
+ assign_pixel(tr, in_img[top][right]);
+ assign_pixel(bl, in_img[bottom][left]);
+ assign_pixel(br, in_img[bottom][right]);
+
+ double temp = (1-tb_frac)*((1-lr_frac)*tl + lr_frac*tr) +
+ tb_frac*((1-lr_frac)*bl + lr_frac*br);
+
+ assign_pixel(out_img[r][c], temp);
+ }
+ }
+ else
+ {
+ for (long c = 0; c < out_img.nc(); ++c)
+ {
+ x += x_scale;
+ const long left = static_cast<long>(std::floor(x));
+ const long right = std::min(left+1, in_img.nc()-1);
+ const double lr_frac = x - left;
+
+ const T tl = in_img[top][left];
+ const T tr = in_img[top][right];
+ const T bl = in_img[bottom][left];
+ const T br = in_img[bottom][right];
+
+ T temp;
+ assign_pixel(temp, 0);
+ vector_to_pixel(temp,
+ (1-tb_frac)*((1-lr_frac)*pixel_to_vector<double>(tl) + lr_frac*pixel_to_vector<double>(tr)) +
+ tb_frac*((1-lr_frac)*pixel_to_vector<double>(bl) + lr_frac*pixel_to_vector<double>(br)));
+ assign_pixel(out_img[r][c], temp);
+ }
+ }
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type1,
+ typename image_type2
+ >
+ struct images_have_same_pixel_types
+ {
+ typedef typename image_traits<image_type1>::pixel_type ptype1;
+ typedef typename image_traits<image_type2>::pixel_type ptype2;
+ const static bool value = is_same_type<ptype1, ptype2>::value;
+ };
+
+ template <
+ typename image_type,
+ typename image_type2
+ >
+ typename enable_if_c<is_grayscale_image<image_type>::value && is_grayscale_image<image_type2>::value && images_have_same_pixel_types<image_type,image_type2>::value>::type
+ resize_image (
+ const image_type& in_img_,
+ image_type2& out_img_,
+ interpolate_bilinear
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( is_same_object(in_img_, out_img_) == false ,
+ "\t void resize_image()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t is_same_object(in_img_, out_img_): " << is_same_object(in_img_, out_img_)
+ );
+
+ const_image_view<image_type> in_img(in_img_);
+ image_view<image_type2> out_img(out_img_);
+
+ if (out_img.size() == 0 || in_img.size() == 0)
+ return;
+
+ typedef typename image_traits<image_type>::pixel_type T;
+ const double x_scale = (in_img.nc()-1)/(double)std::max<long>((out_img.nc()-1),1);
+ const double y_scale = (in_img.nr()-1)/(double)std::max<long>((out_img.nr()-1),1);
+ double y = -y_scale;
+ for (long r = 0; r < out_img.nr(); ++r)
+ {
+ y += y_scale;
+ const long top = static_cast<long>(std::floor(y));
+ const long bottom = std::min(top+1, in_img.nr()-1);
+ const double tb_frac = y - top;
+ double x = -4*x_scale;
+
+ const simd4f _tb_frac = tb_frac;
+ const simd4f _inv_tb_frac = 1-tb_frac;
+ const simd4f _x_scale = 4*x_scale;
+ simd4f _x(x, x+x_scale, x+2*x_scale, x+3*x_scale);
+ long c = 0;
+ for (;; c+=4)
+ {
+ _x += _x_scale;
+ simd4i left = simd4i(_x);
+
+ simd4f _lr_frac = _x-left;
+ simd4f _inv_lr_frac = 1-_lr_frac;
+ simd4i right = left+1;
+
+ simd4f tlf = _inv_tb_frac*_inv_lr_frac;
+ simd4f trf = _inv_tb_frac*_lr_frac;
+ simd4f blf = _tb_frac*_inv_lr_frac;
+ simd4f brf = _tb_frac*_lr_frac;
+
+ int32 fleft[4];
+ int32 fright[4];
+ left.store(fleft);
+ right.store(fright);
+
+ if (fright[3] >= in_img.nc())
+ break;
+ simd4f tl(in_img[top][fleft[0]], in_img[top][fleft[1]], in_img[top][fleft[2]], in_img[top][fleft[3]]);
+ simd4f tr(in_img[top][fright[0]], in_img[top][fright[1]], in_img[top][fright[2]], in_img[top][fright[3]]);
+ simd4f bl(in_img[bottom][fleft[0]], in_img[bottom][fleft[1]], in_img[bottom][fleft[2]], in_img[bottom][fleft[3]]);
+ simd4f br(in_img[bottom][fright[0]], in_img[bottom][fright[1]], in_img[bottom][fright[2]], in_img[bottom][fright[3]]);
+
+ simd4f out = simd4f(tlf*tl + trf*tr + blf*bl + brf*br);
+ float fout[4];
+ out.store(fout);
+
+ out_img[r][c] = static_cast<T>(fout[0]);
+ out_img[r][c+1] = static_cast<T>(fout[1]);
+ out_img[r][c+2] = static_cast<T>(fout[2]);
+ out_img[r][c+3] = static_cast<T>(fout[3]);
+ }
+ x = -x_scale + c*x_scale;
+ for (; c < out_img.nc(); ++c)
+ {
+ x += x_scale;
+ const long left = static_cast<long>(std::floor(x));
+ const long right = std::min(left+1, in_img.nc()-1);
+ const float lr_frac = x - left;
+
+ float tl = 0, tr = 0, bl = 0, br = 0;
+
+ assign_pixel(tl, in_img[top][left]);
+ assign_pixel(tr, in_img[top][right]);
+ assign_pixel(bl, in_img[bottom][left]);
+ assign_pixel(br, in_img[bottom][right]);
+
+ float temp = (1-tb_frac)*((1-lr_frac)*tl + lr_frac*tr) +
+ tb_frac*((1-lr_frac)*bl + lr_frac*br);
+
+ assign_pixel(out_img[r][c], temp);
+ }
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type
+ >
+ typename enable_if<is_rgb_image<image_type> >::type resize_image (
+ const image_type& in_img_,
+ image_type& out_img_,
+ interpolate_bilinear
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( is_same_object(in_img_, out_img_) == false ,
+ "\t void resize_image()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t is_same_object(in_img_, out_img_): " << is_same_object(in_img_, out_img_)
+ );
+
+ const_image_view<image_type> in_img(in_img_);
+ image_view<image_type> out_img(out_img_);
+
+ if (out_img.size() == 0 || in_img.size() == 0)
+ return;
+
+
+ typedef typename image_traits<image_type>::pixel_type T;
+ const double x_scale = (in_img.nc()-1)/(double)std::max<long>((out_img.nc()-1),1);
+ const double y_scale = (in_img.nr()-1)/(double)std::max<long>((out_img.nr()-1),1);
+ double y = -y_scale;
+ for (long r = 0; r < out_img.nr(); ++r)
+ {
+ y += y_scale;
+ const long top = static_cast<long>(std::floor(y));
+ const long bottom = std::min(top+1, in_img.nr()-1);
+ const double tb_frac = y - top;
+ double x = -4*x_scale;
+
+ const simd4f _tb_frac = tb_frac;
+ const simd4f _inv_tb_frac = 1-tb_frac;
+ const simd4f _x_scale = 4*x_scale;
+ simd4f _x(x, x+x_scale, x+2*x_scale, x+3*x_scale);
+ long c = 0;
+ for (;; c+=4)
+ {
+ _x += _x_scale;
+ simd4i left = simd4i(_x);
+ simd4f lr_frac = _x-left;
+ simd4f _inv_lr_frac = 1-lr_frac;
+ simd4i right = left+1;
+
+ simd4f tlf = _inv_tb_frac*_inv_lr_frac;
+ simd4f trf = _inv_tb_frac*lr_frac;
+ simd4f blf = _tb_frac*_inv_lr_frac;
+ simd4f brf = _tb_frac*lr_frac;
+
+ int32 fleft[4];
+ int32 fright[4];
+ left.store(fleft);
+ right.store(fright);
+
+ if (fright[3] >= in_img.nc())
+ break;
+ simd4f tl(in_img[top][fleft[0]].red, in_img[top][fleft[1]].red, in_img[top][fleft[2]].red, in_img[top][fleft[3]].red);
+ simd4f tr(in_img[top][fright[0]].red, in_img[top][fright[1]].red, in_img[top][fright[2]].red, in_img[top][fright[3]].red);
+ simd4f bl(in_img[bottom][fleft[0]].red, in_img[bottom][fleft[1]].red, in_img[bottom][fleft[2]].red, in_img[bottom][fleft[3]].red);
+ simd4f br(in_img[bottom][fright[0]].red, in_img[bottom][fright[1]].red, in_img[bottom][fright[2]].red, in_img[bottom][fright[3]].red);
+
+ simd4i out = simd4i(tlf*tl + trf*tr + blf*bl + brf*br);
+ int32 fout[4];
+ out.store(fout);
+
+ out_img[r][c].red = static_cast<unsigned char>(fout[0]);
+ out_img[r][c+1].red = static_cast<unsigned char>(fout[1]);
+ out_img[r][c+2].red = static_cast<unsigned char>(fout[2]);
+ out_img[r][c+3].red = static_cast<unsigned char>(fout[3]);
+
+
+ tl = simd4f(in_img[top][fleft[0]].green, in_img[top][fleft[1]].green, in_img[top][fleft[2]].green, in_img[top][fleft[3]].green);
+ tr = simd4f(in_img[top][fright[0]].green, in_img[top][fright[1]].green, in_img[top][fright[2]].green, in_img[top][fright[3]].green);
+ bl = simd4f(in_img[bottom][fleft[0]].green, in_img[bottom][fleft[1]].green, in_img[bottom][fleft[2]].green, in_img[bottom][fleft[3]].green);
+ br = simd4f(in_img[bottom][fright[0]].green, in_img[bottom][fright[1]].green, in_img[bottom][fright[2]].green, in_img[bottom][fright[3]].green);
+ out = simd4i(tlf*tl + trf*tr + blf*bl + brf*br);
+ out.store(fout);
+ out_img[r][c].green = static_cast<unsigned char>(fout[0]);
+ out_img[r][c+1].green = static_cast<unsigned char>(fout[1]);
+ out_img[r][c+2].green = static_cast<unsigned char>(fout[2]);
+ out_img[r][c+3].green = static_cast<unsigned char>(fout[3]);
+
+
+ tl = simd4f(in_img[top][fleft[0]].blue, in_img[top][fleft[1]].blue, in_img[top][fleft[2]].blue, in_img[top][fleft[3]].blue);
+ tr = simd4f(in_img[top][fright[0]].blue, in_img[top][fright[1]].blue, in_img[top][fright[2]].blue, in_img[top][fright[3]].blue);
+ bl = simd4f(in_img[bottom][fleft[0]].blue, in_img[bottom][fleft[1]].blue, in_img[bottom][fleft[2]].blue, in_img[bottom][fleft[3]].blue);
+ br = simd4f(in_img[bottom][fright[0]].blue, in_img[bottom][fright[1]].blue, in_img[bottom][fright[2]].blue, in_img[bottom][fright[3]].blue);
+ out = simd4i(tlf*tl + trf*tr + blf*bl + brf*br);
+ out.store(fout);
+ out_img[r][c].blue = static_cast<unsigned char>(fout[0]);
+ out_img[r][c+1].blue = static_cast<unsigned char>(fout[1]);
+ out_img[r][c+2].blue = static_cast<unsigned char>(fout[2]);
+ out_img[r][c+3].blue = static_cast<unsigned char>(fout[3]);
+ }
+ x = -x_scale + c*x_scale;
+ for (; c < out_img.nc(); ++c)
+ {
+ x += x_scale;
+ const long left = static_cast<long>(std::floor(x));
+ const long right = std::min(left+1, in_img.nc()-1);
+ const double lr_frac = x - left;
+
+ const T tl = in_img[top][left];
+ const T tr = in_img[top][right];
+ const T bl = in_img[bottom][left];
+ const T br = in_img[bottom][right];
+
+ T temp;
+ assign_pixel(temp, 0);
+ vector_to_pixel(temp,
+ (1-tb_frac)*((1-lr_frac)*pixel_to_vector<double>(tl) + lr_frac*pixel_to_vector<double>(tr)) +
+ tb_frac*((1-lr_frac)*pixel_to_vector<double>(bl) + lr_frac*pixel_to_vector<double>(br)));
+ assign_pixel(out_img[r][c], temp);
+ }
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type1,
+ typename image_type2
+ >
+ void resize_image (
+ const image_type1& in_img,
+ image_type2& out_img
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( is_same_object(in_img, out_img) == false ,
+ "\t void resize_image()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img)
+ );
+
+ resize_image(in_img, out_img, interpolate_bilinear());
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type
+ >
+ void resize_image (
+ double size_scale,
+ image_type& img
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( size_scale > 0 ,
+ "\t void resize_image()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t size_scale: " << size_scale
+ );
+
+ image_type temp;
+ set_image_size(temp, std::round(size_scale*num_rows(img)), std::round(size_scale*num_columns(img)));
+ resize_image(img, temp);
+ swap(img, temp);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type1,
+ typename image_type2
+ >
+ point_transform_affine flip_image_left_right (
+ const image_type1& in_img,
+ image_type2& out_img
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( is_same_object(in_img, out_img) == false ,
+ "\t void flip_image_left_right()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img)
+ );
+
+ assign_image(out_img, fliplr(mat(in_img)));
+ std::vector<dlib::vector<double,2> > from, to;
+ rectangle r = get_rect(in_img);
+ from.push_back(r.tl_corner()); to.push_back(r.tr_corner());
+ from.push_back(r.bl_corner()); to.push_back(r.br_corner());
+ from.push_back(r.tr_corner()); to.push_back(r.tl_corner());
+ from.push_back(r.br_corner()); to.push_back(r.bl_corner());
+ return find_affine_transform(from,to);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type
+ >
+ point_transform_affine flip_image_left_right (
+ image_type& img
+ )
+ {
+ image_type temp;
+ auto tform = flip_image_left_right(img, temp);
+ swap(temp,img);
+ return tform;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type1,
+ typename image_type2
+ >
+ void flip_image_up_down (
+ const image_type1& in_img,
+ image_type2& out_img
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( is_same_object(in_img, out_img) == false ,
+ "\t void flip_image_up_down()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img)
+ );
+
+ assign_image(out_img, flipud(mat(in_img)));
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ namespace impl
+ {
+ inline rectangle flip_rect_left_right (
+ const rectangle& rect,
+ const rectangle& window
+ )
+ {
+ rectangle temp;
+ temp.top() = rect.top();
+ temp.bottom() = rect.bottom();
+
+ const long left_dist = rect.left()-window.left();
+
+ temp.right() = window.right()-left_dist;
+ temp.left() = temp.right()-rect.width()+1;
+ return temp;
+ }
+
+ inline rectangle tform_object (
+ const point_transform_affine& tran,
+ const rectangle& rect
+ )
+ {
+ return centered_rect(tran(center(rect)), rect.width(), rect.height());
+ }
+
+ inline mmod_rect tform_object (
+ const point_transform_affine& tran,
+ mmod_rect rect
+ )
+ {
+ rect.rect = tform_object(tran, rect.rect);
+ return rect;
+ }
+
+ inline full_object_detection tform_object(
+ const point_transform_affine& tran,
+ const full_object_detection& obj
+ )
+ {
+ std::vector<point> parts;
+ parts.reserve(obj.num_parts());
+ for (unsigned long i = 0; i < obj.num_parts(); ++i)
+ {
+ if (obj.part(i) != OBJECT_PART_NOT_PRESENT)
+ parts.push_back(tran(obj.part(i)));
+ else
+ parts.push_back(OBJECT_PART_NOT_PRESENT);
+ }
+ return full_object_detection(tform_object(tran,obj.get_rect()), parts);
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_array_type,
+ typename T
+ >
+ void add_image_left_right_flips (
+ image_array_type& images,
+ std::vector<std::vector<T> >& objects
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( images.size() == objects.size(),
+ "\t void add_image_left_right_flips()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t images.size(): " << images.size()
+ << "\n\t objects.size(): " << objects.size()
+ );
+
+ typename image_array_type::value_type temp;
+ std::vector<T> rects;
+
+ const unsigned long num = images.size();
+ for (unsigned long j = 0; j < num; ++j)
+ {
+ const point_transform_affine tran = flip_image_left_right(images[j], temp);
+
+ rects.clear();
+ for (unsigned long i = 0; i < objects[j].size(); ++i)
+ rects.push_back(impl::tform_object(tran, objects[j][i]));
+
+ images.push_back(std::move(temp));
+ objects.push_back(rects);
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_array_type,
+ typename T,
+ typename U
+ >
+ void add_image_left_right_flips (
+ image_array_type& images,
+ std::vector<std::vector<T> >& objects,
+ std::vector<std::vector<U> >& objects2
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( images.size() == objects.size() &&
+ images.size() == objects2.size(),
+ "\t void add_image_left_right_flips()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t images.size(): " << images.size()
+ << "\n\t objects.size(): " << objects.size()
+ << "\n\t objects2.size(): " << objects2.size()
+ );
+
+ typename image_array_type::value_type temp;
+ std::vector<T> rects;
+ std::vector<U> rects2;
+
+ const unsigned long num = images.size();
+ for (unsigned long j = 0; j < num; ++j)
+ {
+ const point_transform_affine tran = flip_image_left_right(images[j], temp);
+ images.push_back(std::move(temp));
+
+ rects.clear();
+ for (unsigned long i = 0; i < objects[j].size(); ++i)
+ rects.push_back(impl::tform_object(tran, objects[j][i]));
+ objects.push_back(rects);
+
+ rects2.clear();
+ for (unsigned long i = 0; i < objects2[j].size(); ++i)
+ rects2.push_back(impl::tform_object(tran, objects2[j][i]));
+ objects2.push_back(rects2);
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <typename image_array_type>
+ void flip_image_dataset_left_right (
+ image_array_type& images,
+ std::vector<std::vector<rectangle> >& objects
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( images.size() == objects.size(),
+ "\t void flip_image_dataset_left_right()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t images.size(): " << images.size()
+ << "\n\t objects.size(): " << objects.size()
+ );
+
+ typename image_array_type::value_type temp;
+ for (unsigned long i = 0; i < images.size(); ++i)
+ {
+ flip_image_left_right(images[i], temp);
+ swap(temp,images[i]);
+ for (unsigned long j = 0; j < objects[i].size(); ++j)
+ {
+ objects[i][j] = impl::flip_rect_left_right(objects[i][j], get_rect(images[i]));
+ }
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <typename image_array_type>
+ void flip_image_dataset_left_right (
+ image_array_type& images,
+ std::vector<std::vector<rectangle> >& objects,
+ std::vector<std::vector<rectangle> >& objects2
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( images.size() == objects.size() &&
+ images.size() == objects2.size(),
+ "\t void flip_image_dataset_left_right()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t images.size(): " << images.size()
+ << "\n\t objects.size(): " << objects.size()
+ << "\n\t objects2.size(): " << objects2.size()
+ );
+
+ typename image_array_type::value_type temp;
+ for (unsigned long i = 0; i < images.size(); ++i)
+ {
+ flip_image_left_right(images[i], temp);
+ swap(temp, images[i]);
+ for (unsigned long j = 0; j < objects[i].size(); ++j)
+ {
+ objects[i][j] = impl::flip_rect_left_right(objects[i][j], get_rect(images[i]));
+ }
+ for (unsigned long j = 0; j < objects2[i].size(); ++j)
+ {
+ objects2[i][j] = impl::flip_rect_left_right(objects2[i][j], get_rect(images[i]));
+ }
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename pyramid_type,
+ typename image_array_type
+ >
+ void upsample_image_dataset (
+ image_array_type& images,
+ std::vector<std::vector<rectangle> >& objects,
+ unsigned long max_image_size = std::numeric_limits<unsigned long>::max()
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( images.size() == objects.size(),
+ "\t void upsample_image_dataset()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t images.size(): " << images.size()
+ << "\n\t objects.size(): " << objects.size()
+ );
+
+ typename image_array_type::value_type temp;
+ pyramid_type pyr;
+ for (unsigned long i = 0; i < images.size(); ++i)
+ {
+ const unsigned long img_size = num_rows(images[i])*num_columns(images[i]);
+ if (img_size <= max_image_size)
+ {
+ pyramid_up(images[i], temp, pyr);
+ swap(temp, images[i]);
+ for (unsigned long j = 0; j < objects[i].size(); ++j)
+ {
+ objects[i][j] = pyr.rect_up(objects[i][j]);
+ }
+ }
+ }
+ }
+
+ template <
+ typename pyramid_type,
+ typename image_array_type
+ >
+ void upsample_image_dataset (
+ image_array_type& images,
+ std::vector<std::vector<mmod_rect>>& objects,
+ unsigned long max_image_size = std::numeric_limits<unsigned long>::max()
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( images.size() == objects.size(),
+ "\t void upsample_image_dataset()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t images.size(): " << images.size()
+ << "\n\t objects.size(): " << objects.size()
+ );
+
+ typename image_array_type::value_type temp;
+ pyramid_type pyr;
+ for (unsigned long i = 0; i < images.size(); ++i)
+ {
+ const unsigned long img_size = num_rows(images[i])*num_columns(images[i]);
+ if (img_size <= max_image_size)
+ {
+ pyramid_up(images[i], temp, pyr);
+ swap(temp, images[i]);
+ for (unsigned long j = 0; j < objects[i].size(); ++j)
+ {
+ objects[i][j].rect = pyr.rect_up(objects[i][j].rect);
+ }
+ }
+ }
+ }
+
+ template <
+ typename pyramid_type,
+ typename image_array_type
+ >
+ void upsample_image_dataset (
+ image_array_type& images,
+ std::vector<std::vector<rectangle> >& objects,
+ std::vector<std::vector<rectangle> >& objects2,
+ unsigned long max_image_size = std::numeric_limits<unsigned long>::max()
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( images.size() == objects.size() &&
+ images.size() == objects2.size(),
+ "\t void upsample_image_dataset()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t images.size(): " << images.size()
+ << "\n\t objects.size(): " << objects.size()
+ << "\n\t objects2.size(): " << objects2.size()
+ );
+
+ typename image_array_type::value_type temp;
+ pyramid_type pyr;
+ for (unsigned long i = 0; i < images.size(); ++i)
+ {
+ const unsigned long img_size = num_rows(images[i])*num_columns(images[i]);
+ if (img_size <= max_image_size)
+ {
+ pyramid_up(images[i], temp, pyr);
+ swap(temp, images[i]);
+ for (unsigned long j = 0; j < objects[i].size(); ++j)
+ {
+ objects[i][j] = pyr.rect_up(objects[i][j]);
+ }
+ for (unsigned long j = 0; j < objects2[i].size(); ++j)
+ {
+ objects2[i][j] = pyr.rect_up(objects2[i][j]);
+ }
+ }
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <typename image_array_type>
+ void rotate_image_dataset (
+ double angle,
+ image_array_type& images,
+ std::vector<std::vector<rectangle> >& objects
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( images.size() == objects.size(),
+ "\t void rotate_image_dataset()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t images.size(): " << images.size()
+ << "\n\t objects.size(): " << objects.size()
+ );
+
+ typename image_array_type::value_type temp;
+ for (unsigned long i = 0; i < images.size(); ++i)
+ {
+ const point_transform_affine tran = rotate_image(images[i], temp, angle);
+ swap(temp, images[i]);
+ for (unsigned long j = 0; j < objects[i].size(); ++j)
+ {
+ const rectangle rect = objects[i][j];
+ objects[i][j] = centered_rect(tran(center(rect)), rect.width(), rect.height());
+ }
+ }
+ }
+
+ template <typename image_array_type>
+ void rotate_image_dataset (
+ double angle,
+ image_array_type& images,
+ std::vector<std::vector<rectangle> >& objects,
+ std::vector<std::vector<rectangle> >& objects2
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( images.size() == objects.size() &&
+ images.size() == objects2.size(),
+ "\t void rotate_image_dataset()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t images.size(): " << images.size()
+ << "\n\t objects.size(): " << objects.size()
+ << "\n\t objects2.size(): " << objects2.size()
+ );
+
+ typename image_array_type::value_type temp;
+ for (unsigned long i = 0; i < images.size(); ++i)
+ {
+ const point_transform_affine tran = rotate_image(images[i], temp, angle);
+ swap(temp, images[i]);
+ for (unsigned long j = 0; j < objects[i].size(); ++j)
+ {
+ const rectangle rect = objects[i][j];
+ objects[i][j] = centered_rect(tran(center(rect)), rect.width(), rect.height());
+ }
+ for (unsigned long j = 0; j < objects2[i].size(); ++j)
+ {
+ const rectangle rect = objects2[i][j];
+ objects2[i][j] = centered_rect(tran(center(rect)), rect.width(), rect.height());
+ }
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_array_type,
+ typename EXP,
+ typename T,
+ typename U
+ >
+ void add_image_rotations (
+ const matrix_exp<EXP>& angles,
+ image_array_type& images,
+ std::vector<std::vector<T> >& objects,
+ std::vector<std::vector<U> >& objects2
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( is_vector(angles) && angles.size() > 0 &&
+ images.size() == objects.size() &&
+ images.size() == objects2.size(),
+ "\t void add_image_rotations()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t is_vector(angles): " << is_vector(angles)
+ << "\n\t angles.size(): " << angles.size()
+ << "\n\t images.size(): " << images.size()
+ << "\n\t objects.size(): " << objects.size()
+ << "\n\t objects2.size(): " << objects2.size()
+ );
+
+ image_array_type new_images;
+ std::vector<std::vector<T> > new_objects;
+ std::vector<std::vector<U> > new_objects2;
+
+ using namespace impl;
+
+ std::vector<T> objtemp;
+ std::vector<U> objtemp2;
+ typename image_array_type::value_type temp;
+ for (long i = 0; i < angles.size(); ++i)
+ {
+ for (unsigned long j = 0; j < images.size(); ++j)
+ {
+ const point_transform_affine tran = rotate_image(images[j], temp, angles(i));
+ new_images.push_back(std::move(temp));
+
+ objtemp.clear();
+ for (unsigned long k = 0; k < objects[j].size(); ++k)
+ objtemp.push_back(tform_object(tran, objects[j][k]));
+ new_objects.push_back(objtemp);
+
+ objtemp2.clear();
+ for (unsigned long k = 0; k < objects2[j].size(); ++k)
+ objtemp2.push_back(tform_object(tran, objects2[j][k]));
+ new_objects2.push_back(objtemp2);
+ }
+ }
+
+ new_images.swap(images);
+ new_objects.swap(objects);
+ new_objects2.swap(objects2);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_array_type,
+ typename EXP,
+ typename T
+ >
+ void add_image_rotations (
+ const matrix_exp<EXP>& angles,
+ image_array_type& images,
+ std::vector<std::vector<T> >& objects
+ )
+ {
+ std::vector<std::vector<T> > objects2(objects.size());
+ add_image_rotations(angles, images, objects, objects2);
+ }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type1,
+ typename image_type2,
+ typename pyramid_type,
+ typename interpolation_type
+ >
+ void pyramid_up (
+ const image_type1& in_img,
+ image_type2& out_img,
+ const pyramid_type& pyr,
+ const interpolation_type& interp
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( is_same_object(in_img, out_img) == false ,
+ "\t void pyramid_up()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img)
+ );
+
+ if (image_size(in_img) == 0)
+ {
+ set_image_size(out_img, 0, 0);
+ return;
+ }
+
+ rectangle rect = get_rect(in_img);
+ rectangle uprect = pyr.rect_up(rect);
+ if (uprect.is_empty())
+ {
+ set_image_size(out_img, 0, 0);
+ return;
+ }
+ set_image_size(out_img, uprect.bottom()+1, uprect.right()+1);
+
+ resize_image(in_img, out_img, interp);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type1,
+ typename image_type2,
+ typename pyramid_type
+ >
+ void pyramid_up (
+ const image_type1& in_img,
+ image_type2& out_img,
+ const pyramid_type& pyr
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT( is_same_object(in_img, out_img) == false ,
+ "\t void pyramid_up()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img)
+ );
+
+ pyramid_up(in_img, out_img, pyr, interpolate_bilinear());
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type,
+ typename pyramid_type
+ >
+ void pyramid_up (
+ image_type& img,
+ const pyramid_type& pyr
+ )
+ {
+ image_type temp;
+ pyramid_up(img, temp, pyr);
+ swap(temp, img);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type
+ >
+ void pyramid_up (
+ image_type& img
+ )
+ {
+ pyramid_down<2> pyr;
+ pyramid_up(img, pyr);
+ }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ struct chip_dims
+ {
+ chip_dims (
+ unsigned long rows_,
+ unsigned long cols_
+ ) : rows(rows_), cols(cols_) { }
+
+ unsigned long rows;
+ unsigned long cols;
+ };
+
+ struct chip_details
+ {
+ chip_details() : angle(0), rows(0), cols(0) {}
+ chip_details(const rectangle& rect_) : rect(rect_),angle(0), rows(rect_.height()), cols(rect_.width()) {}
+ chip_details(const drectangle& rect_) : rect(rect_),angle(0),
+ rows((unsigned long)(rect_.height()+0.5)), cols((unsigned long)(rect_.width()+0.5)) {}
+ chip_details(const drectangle& rect_, unsigned long size) : rect(rect_),angle(0)
+ { compute_dims_from_size(size); }
+ chip_details(const drectangle& rect_, unsigned long size, double angle_) : rect(rect_),angle(angle_)
+ { compute_dims_from_size(size); }
+
+ chip_details(const drectangle& rect_, const chip_dims& dims) :
+ rect(rect_),angle(0),rows(dims.rows), cols(dims.cols) {}
+ chip_details(const drectangle& rect_, const chip_dims& dims, double angle_) :
+ rect(rect_),angle(angle_),rows(dims.rows), cols(dims.cols) {}
+
+ template <typename T>
+ chip_details(
+ const std::vector<dlib::vector<T,2> >& chip_points,
+ const std::vector<dlib::vector<T,2> >& img_points,
+ const chip_dims& dims
+ ) :
+ rows(dims.rows), cols(dims.cols)
+ {
+ DLIB_CASSERT( chip_points.size() == img_points.size() && chip_points.size() >= 2,
+ "\t chip_details::chip_details(chip_points,img_points,dims)"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t chip_points.size(): " << chip_points.size()
+ << "\n\t img_points.size(): " << img_points.size()
+ );
+
+ const point_transform_affine tform = find_similarity_transform(chip_points,img_points);
+ dlib::vector<double,2> p(1,0);
+ p = tform.get_m()*p;
+
+ // There are only 3 things happening in a similarity transform. There is a
+ // rescaling, a rotation, and a translation. So here we pick out the scale and
+ // rotation parameters.
+ angle = std::atan2(p.y(),p.x());
+ // Note that the translation and scale part are represented by the extraction
+ // rectangle. So here we build the appropriate rectangle.
+ const double scale = length(p);
+ rect = centered_drect(tform(point(dims.cols,dims.rows)/2.0),
+ dims.cols*scale,
+ dims.rows*scale);
+ }
+
+
+ drectangle rect;
+ double angle;
+ unsigned long rows;
+ unsigned long cols;
+
+ inline unsigned long size() const
+ {
+ return rows*cols;
+ }
+
+ private:
+ void compute_dims_from_size (
+ unsigned long size
+ )
+ {
+ const double relative_size = std::sqrt(size/(double)rect.area());
+ rows = static_cast<unsigned long>(rect.height()*relative_size + 0.5);
+ cols = static_cast<unsigned long>(size/(double)rows + 0.5);
+ rows = std::max(1ul,rows);
+ cols = std::max(1ul,cols);
+ }
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ inline point_transform_affine get_mapping_to_chip (
+ const chip_details& details
+ )
+ {
+ std::vector<dlib::vector<double,2> > from, to;
+ point p1(0,0);
+ point p2(details.cols-1,0);
+ point p3(details.cols-1, details.rows-1);
+ to.push_back(p1);
+ from.push_back(rotate_point<double>(center(details.rect),details.rect.tl_corner(),details.angle));
+ to.push_back(p2);
+ from.push_back(rotate_point<double>(center(details.rect),details.rect.tr_corner(),details.angle));
+ to.push_back(p3);
+ from.push_back(rotate_point<double>(center(details.rect),details.rect.br_corner(),details.angle));
+ return find_affine_transform(from, to);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ inline full_object_detection map_det_to_chip(
+ const full_object_detection& det,
+ const chip_details& details
+ )
+ {
+ point_transform_affine tform = get_mapping_to_chip(details);
+ full_object_detection res(det);
+ // map the parts
+ for (unsigned long l = 0; l < det.num_parts(); ++l)
+ {
+ if (det.part(l) != OBJECT_PART_NOT_PRESENT)
+ res.part(l) = tform(det.part(l));
+ else
+ res.part(l) = OBJECT_PART_NOT_PRESENT;
+ }
+ // map the main rectangle
+ rectangle rect;
+ rect += tform(det.get_rect().tl_corner());
+ rect += tform(det.get_rect().tr_corner());
+ rect += tform(det.get_rect().bl_corner());
+ rect += tform(det.get_rect().br_corner());
+ res.get_rect() = rect;
+ return res;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ namespace impl
+ {
+ template <
+ typename image_type1,
+ typename image_type2
+ >
+ void basic_extract_image_chip (
+ const image_type1& img,
+ const rectangle& location,
+ image_type2& chip
+ )
+ /*!
+ ensures
+ - This function doesn't do any scaling or rotating. It just pulls out the
+ chip in the given rectangle. This also means the output image has the
+ same dimensions as the location rectangle.
+ !*/
+ {
+ const_image_view<image_type1> vimg(img);
+ image_view<image_type2> vchip(chip);
+
+ vchip.set_size(location.height(), location.width());
+
+ // location might go outside img so clip it
+ rectangle area = location.intersect(get_rect(img));
+
+ // find the part of the chip that corresponds to area in img.
+ rectangle chip_area = translate_rect(area, -location.tl_corner());
+
+ zero_border_pixels(chip, chip_area);
+ // now pull out the contents of area/chip_area.
+ for (long r = chip_area.top(), rr = area.top(); r <= chip_area.bottom(); ++r,++rr)
+ {
+ for (long c = chip_area.left(), cc = area.left(); c <= chip_area.right(); ++c,++cc)
+ {
+ assign_pixel(vchip[r][c], vimg[rr][cc]);
+ }
+ }
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type1,
+ typename image_type2,
+ typename interpolation_type
+ >
+ void extract_image_chips (
+ const image_type1& img,
+ const std::vector<chip_details>& chip_locations,
+ dlib::array<image_type2>& chips,
+ const interpolation_type& interp
+ )
+ {
+ // make sure requires clause is not broken
+#ifdef ENABLE_ASSERTS
+ for (unsigned long i = 0; i < chip_locations.size(); ++i)
+ {
+ DLIB_CASSERT(chip_locations[i].size() != 0 &&
+ chip_locations[i].rect.is_empty() == false,
+ "\t void extract_image_chips()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t chip_locations["<<i<<"].size(): " << chip_locations[i].size()
+ << "\n\t chip_locations["<<i<<"].rect.is_empty(): " << chip_locations[i].rect.is_empty()
+ );
+ }
+#endif
+
+ pyramid_down<2> pyr;
+ long max_depth = 0;
+ // If the chip is supposed to be much smaller than the source subwindow then you
+ // can't just extract it using bilinear interpolation since at a high enough
+ // downsampling amount it would effectively turn into nearest neighbor
+ // interpolation. So we use an image pyramid to make sure the interpolation is
+ // fast but also high quality. The first thing we do is figure out how deep the
+ // image pyramid needs to be.
+ rectangle bounding_box;
+ for (unsigned long i = 0; i < chip_locations.size(); ++i)
+ {
+ long depth = 0;
+ double grow = 2;
+ drectangle rect = pyr.rect_down(chip_locations[i].rect);
+ while (rect.area() > chip_locations[i].size())
+ {
+ rect = pyr.rect_down(rect);
+ ++depth;
+ // We drop the image size by a factor of 2 each iteration and then assume a
+ // border of 2 pixels is needed to avoid any border effects of the crop.
+ grow = grow*2 + 2;
+ }
+ drectangle rot_rect;
+ const vector<double,2> cent = center(chip_locations[i].rect);
+ rot_rect += rotate_point<double>(cent,chip_locations[i].rect.tl_corner(),chip_locations[i].angle);
+ rot_rect += rotate_point<double>(cent,chip_locations[i].rect.tr_corner(),chip_locations[i].angle);
+ rot_rect += rotate_point<double>(cent,chip_locations[i].rect.bl_corner(),chip_locations[i].angle);
+ rot_rect += rotate_point<double>(cent,chip_locations[i].rect.br_corner(),chip_locations[i].angle);
+ bounding_box += grow_rect(rot_rect, grow).intersect(get_rect(img));
+ max_depth = std::max(depth,max_depth);
+ }
+ //std::cout << "max_depth: " << max_depth << std::endl;
+ //std::cout << "crop amount: " << bounding_box.area()/(double)get_rect(img).area() << std::endl;
+
+ // now make an image pyramid
+ dlib::array<array2d<typename image_traits<image_type1>::pixel_type> > levels(max_depth);
+ if (levels.size() != 0)
+ pyr(sub_image(img,bounding_box),levels[0]);
+ for (unsigned long i = 1; i < levels.size(); ++i)
+ pyr(levels[i-1],levels[i]);
+
+ std::vector<dlib::vector<double,2> > from, to;
+
+ // now pull out the chips
+ chips.resize(chip_locations.size());
+ for (unsigned long i = 0; i < chips.size(); ++i)
+ {
+ // If the chip doesn't have any rotation or scaling then use the basic version
+ // of chip extraction that just does a fast copy.
+ if (chip_locations[i].angle == 0 &&
+ chip_locations[i].rows == chip_locations[i].rect.height() &&
+ chip_locations[i].cols == chip_locations[i].rect.width())
+ {
+ impl::basic_extract_image_chip(img, chip_locations[i].rect, chips[i]);
+ }
+ else
+ {
+ set_image_size(chips[i], chip_locations[i].rows, chip_locations[i].cols);
+
+ // figure out which level in the pyramid to use to extract the chip
+ int level = -1;
+ drectangle rect = translate_rect(chip_locations[i].rect, -bounding_box.tl_corner());
+ while (pyr.rect_down(rect).area() > chip_locations[i].size())
+ {
+ ++level;
+ rect = pyr.rect_down(rect);
+ }
+
+ // find the appropriate transformation that maps from the chip to the input
+ // image
+ from.clear();
+ to.clear();
+ from.push_back(get_rect(chips[i]).tl_corner()); to.push_back(rotate_point<double>(center(rect),rect.tl_corner(),chip_locations[i].angle));
+ from.push_back(get_rect(chips[i]).tr_corner()); to.push_back(rotate_point<double>(center(rect),rect.tr_corner(),chip_locations[i].angle));
+ from.push_back(get_rect(chips[i]).bl_corner()); to.push_back(rotate_point<double>(center(rect),rect.bl_corner(),chip_locations[i].angle));
+ point_transform_affine trns = find_affine_transform(from,to);
+
+ // now extract the actual chip
+ if (level == -1)
+ transform_image(sub_image(img,bounding_box),chips[i],interp,trns);
+ else
+ transform_image(levels[level],chips[i],interp,trns);
+ }
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type1,
+ typename image_type2
+ >
+ void extract_image_chips(
+ const image_type1& img,
+ const std::vector<chip_details>& chip_locations,
+ dlib::array<image_type2>& chips
+ )
+ {
+ extract_image_chips(img, chip_locations, chips, interpolate_bilinear());
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type1,
+ typename image_type2,
+ typename interpolation_type
+ >
+ void extract_image_chip (
+ const image_type1& img,
+ const chip_details& location,
+ image_type2& chip,
+ const interpolation_type& interp
+ )
+ {
+ // If the chip doesn't have any rotation or scaling then use the basic version of
+ // chip extraction that just does a fast copy.
+ if (location.angle == 0 &&
+ location.rows == location.rect.height() &&
+ location.cols == location.rect.width())
+ {
+ impl::basic_extract_image_chip(img, location.rect, chip);
+ }
+ else
+ {
+ std::vector<chip_details> chip_locations(1,location);
+ dlib::array<image_type2> chips;
+ extract_image_chips(img, chip_locations, chips, interp);
+ swap(chips[0], chip);
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type1,
+ typename image_type2
+ >
+ void extract_image_chip (
+ const image_type1& img,
+ const chip_details& location,
+ image_type2& chip
+ )
+ {
+ extract_image_chip(img, location, chip, interpolate_bilinear());
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ inline chip_details get_face_chip_details (
+ const full_object_detection& det,
+ const unsigned long size = 200,
+ const double padding = 0.2
+ )
+ {
+ DLIB_CASSERT(det.num_parts() == 68 || det.num_parts() == 5,
+ "\t chip_details get_face_chip_details()"
+ << "\n\t You have to give either a 5 point or 68 point face landmarking output to this function. "
+ << "\n\t det.num_parts(): " << det.num_parts()
+ );
+ DLIB_CASSERT(padding >= 0 && size > 0,
+ "\t chip_details get_face_chip_details()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t padding: " << padding
+ << "\n\t size: " << size
+ );
+
+
+ std::vector<dpoint> from_points, to_points;
+ if (det.num_parts() == 5)
+ {
+ dpoint p0(0.8595674595992, 0.2134981538014);
+ dpoint p1(0.6460604764104, 0.2289674387677);
+ dpoint p2(0.1205750620789, 0.2137274526848);
+ dpoint p3(0.3340850613712, 0.2290642403242);
+ dpoint p4(0.4901123135679, 0.6277975316475);
+
+
+ p0 = (padding+p0)/(2*padding+1);
+ p1 = (padding+p1)/(2*padding+1);
+ p2 = (padding+p2)/(2*padding+1);
+ p3 = (padding+p3)/(2*padding+1);
+ p4 = (padding+p4)/(2*padding+1);
+
+ from_points.push_back(p0*size);
+ to_points.push_back(det.part(0));
+
+ from_points.push_back(p1*size);
+ to_points.push_back(det.part(1));
+
+ from_points.push_back(p2*size);
+ to_points.push_back(det.part(2));
+
+ from_points.push_back(p3*size);
+ to_points.push_back(det.part(3));
+
+ from_points.push_back(p4*size);
+ to_points.push_back(det.part(4));
+ }
+ else
+ {
+ // Average positions of face points 17-67
+ const double mean_face_shape_x[] = {
+ 0.000213256, 0.0752622, 0.18113, 0.29077, 0.393397, 0.586856, 0.689483, 0.799124,
+ 0.904991, 0.98004, 0.490127, 0.490127, 0.490127, 0.490127, 0.36688, 0.426036,
+ 0.490127, 0.554217, 0.613373, 0.121737, 0.187122, 0.265825, 0.334606, 0.260918,
+ 0.182743, 0.645647, 0.714428, 0.793132, 0.858516, 0.79751, 0.719335, 0.254149,
+ 0.340985, 0.428858, 0.490127, 0.551395, 0.639268, 0.726104, 0.642159, 0.556721,
+ 0.490127, 0.423532, 0.338094, 0.290379, 0.428096, 0.490127, 0.552157, 0.689874,
+ 0.553364, 0.490127, 0.42689
+ };
+ const double mean_face_shape_y[] = {
+ 0.106454, 0.038915, 0.0187482, 0.0344891, 0.0773906, 0.0773906, 0.0344891,
+ 0.0187482, 0.038915, 0.106454, 0.203352, 0.307009, 0.409805, 0.515625, 0.587326,
+ 0.609345, 0.628106, 0.609345, 0.587326, 0.216423, 0.178758, 0.179852, 0.231733,
+ 0.245099, 0.244077, 0.231733, 0.179852, 0.178758, 0.216423, 0.244077, 0.245099,
+ 0.780233, 0.745405, 0.727388, 0.742578, 0.727388, 0.745405, 0.780233, 0.864805,
+ 0.902192, 0.909281, 0.902192, 0.864805, 0.784792, 0.778746, 0.785343, 0.778746,
+ 0.784792, 0.824182, 0.831803, 0.824182
+ };
+
+ COMPILE_TIME_ASSERT(sizeof(mean_face_shape_x)/sizeof(double) == 68-17);
+
+ for (unsigned long i = 17; i < det.num_parts(); ++i)
+ {
+ // Ignore the lower lip
+ if ((55 <= i && i <= 59) || (65 <= i && i <= 67))
+ continue;
+ // Ignore the eyebrows
+ if (17 <= i && i <= 26)
+ continue;
+
+ dpoint p;
+ p.x() = (padding+mean_face_shape_x[i-17])/(2*padding+1);
+ p.y() = (padding+mean_face_shape_y[i-17])/(2*padding+1);
+ from_points.push_back(p*size);
+ to_points.push_back(det.part(i));
+ }
+ }
+
+ return chip_details(from_points, to_points, chip_dims(size,size));
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ inline std::vector<chip_details> get_face_chip_details (
+ const std::vector<full_object_detection>& dets,
+ const unsigned long size = 200,
+ const double padding = 0.2
+ )
+ {
+ std::vector<chip_details> res;
+ res.reserve(dets.size());
+ for (unsigned long i = 0; i < dets.size(); ++i)
+ res.push_back(get_face_chip_details(dets[i], size, padding));
+ return res;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type
+ >
+ image_type jitter_image(
+ const image_type& img,
+ dlib::rand& rnd
+ )
+ {
+ DLIB_CASSERT(num_rows(img)*num_columns(img) != 0);
+ DLIB_CASSERT(num_rows(img)==num_columns(img));
+
+ const double max_rotation_degrees = 3;
+ const double min_object_height = 0.97;
+ const double max_object_height = 0.99999;
+ const double translate_amount = 0.02;
+
+
+ const auto rect = shrink_rect(get_rect(img),3);
+
+ // perturb the location of the crop by a small fraction of the object's size.
+ const point rand_translate = dpoint(rnd.get_double_in_range(-translate_amount,translate_amount)*rect.width(),
+ rnd.get_double_in_range(-translate_amount,translate_amount)*rect.height());
+
+ // perturb the scale of the crop by a fraction of the object's size
+ const double rand_scale_perturb = rnd.get_double_in_range(min_object_height, max_object_height);
+
+ const long box_size = rect.height()/rand_scale_perturb;
+ const auto crop_rect = centered_rect(center(rect)+rand_translate, box_size, box_size);
+ const double angle = rnd.get_double_in_range(-max_rotation_degrees, max_rotation_degrees)*pi/180;
+ image_type crop;
+ extract_image_chip(img, chip_details(crop_rect, chip_dims(img.nr(),img.nc()), angle), crop);
+ if (rnd.get_random_double() > 0.5)
+ flip_image_left_right(crop);
+
+ return crop;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_INTERPOlATIONh_
+
diff --git a/ml/dlib/dlib/image_transforms/interpolation_abstract.h b/ml/dlib/dlib/image_transforms/interpolation_abstract.h
new file mode 100644
index 000000000..f2da2fb02
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/interpolation_abstract.h
@@ -0,0 +1,1480 @@
+// Copyright (C) 2012 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_INTERPOlATION_ABSTRACT_
+#ifdef DLIB_INTERPOlATION_ABSTRACT_
+
+#include "../pixel.h"
+#include "../image_processing/full_object_detection_abstract.h"
+#include "../image_processing/generic_image.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ class interpolate_nearest_neighbor
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This object is a tool for performing nearest neighbor interpolation
+ on an image.
+ !*/
+
+ public:
+
+ template <
+ typename image_view_type,
+ typename pixel_type
+ >
+ bool operator() (
+ const image_view_type& img,
+ const dlib::point& p,
+ pixel_type& result
+ ) const;
+ /*!
+ requires
+ - image_view_type == an image_view or const_image_view object.
+ - pixel_traits<typename image_view_type::pixel_type>::has_alpha == false
+ - pixel_traits<pixel_type> is defined
+ ensures
+ - if (p is located inside img) then
+ - #result == img[p.y()][p.x()]
+ (This assignment is done using assign_pixel(#result, img[p.y()][p.x()]),
+ therefore any necessary color space conversion will be performed)
+ - returns true
+ - else
+ - returns false
+ !*/
+
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ class interpolate_bilinear
+ {
+
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This object is a tool for performing bilinear interpolation
+ on an image. This is performed by looking at the 4 pixels
+ nearest to a point and deriving an interpolated value from them.
+ !*/
+
+ public:
+
+ template <
+ typename T,
+ typename image_view_type,
+ typename pixel_type
+ >
+ bool operator() (
+ const image_view_type& img,
+ const dlib::vector<T,2>& p,
+ pixel_type& result
+ ) const;
+ /*!
+ requires
+ - image_view_type == an image_view or const_image_view object
+ - pixel_traits<typename image_view_type::pixel_type>::has_alpha == false
+ - pixel_traits<pixel_type> is defined
+ ensures
+ - if (there is an interpolatable image location at point p in img) then
+ - #result == the interpolated pixel value from img at point p.
+ - assign_pixel() will be used to write to #result, therefore any
+ necessary color space conversion will be performed.
+ - returns true
+ - if img contains RGB pixels then the interpolation will be in color.
+ Otherwise, the interpolation will be performed in a grayscale mode.
+ - else
+ - returns false
+ !*/
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ class interpolate_quadratic
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This object is a tool for performing quadratic interpolation
+ on an image. This is performed by looking at the 9 pixels
+ nearest to a point and deriving an interpolated value from them.
+ !*/
+
+ public:
+
+ template <
+ typename T,
+ typename image_view_type,
+ typename pixel_type
+ >
+ bool operator() (
+ const image_view_type& img,
+ const dlib::vector<T,2>& p,
+ pixel_type& result
+ ) const;
+ /*!
+ requires
+ - image_view_type == an image_view or const_image_view object.
+ - pixel_traits<typename image_view_type::pixel_type>::has_alpha == false
+ - pixel_traits<pixel_type> is defined
+ ensures
+ - if (there is an interpolatable image location at point p in img) then
+ - #result == the interpolated pixel value from img at point p
+ - assign_pixel() will be used to write to #result, therefore any
+ necessary color space conversion will be performed.
+ - returns true
+ - if img contains RGB pixels then the interpolation will be in color.
+ Otherwise, the interpolation will be performed in a grayscale mode.
+ - else
+ - returns false
+ !*/
+ };
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ class black_background
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This is a function object which simply sets a pixel
+ to have a black value.
+ !*/
+
+ public:
+ template <typename pixel_type>
+ void operator() ( pixel_type& p) const { assign_pixel(p, 0); }
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ class white_background
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This is a function object which simply sets a pixel
+ to have a white value.
+ !*/
+
+ public:
+ template <typename pixel_type>
+ void operator() ( pixel_type& p) const { assign_pixel(p, 255); }
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ class no_background
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This is a function object which does nothing. It is useful
+ when used with the transform_image() routine defined below
+ if no modification of uninterpolated output pixels is desired.
+ !*/
+ public:
+ template <typename pixel_type>
+ void operator() ( pixel_type& ) const { }
+ };
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type1,
+ typename image_type2,
+ typename interpolation_type,
+ typename point_mapping_type,
+ typename background_type
+ >
+ void transform_image (
+ const image_type1& in_img,
+ image_type2& out_img,
+ const interpolation_type& interp,
+ const point_mapping_type& map_point,
+ const background_type& set_background,
+ const rectangle& area
+ );
+ /*!
+ requires
+ - image_type1 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - image_type2 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - interpolation_type == interpolate_nearest_neighbor, interpolate_bilinear,
+ interpolate_quadratic, or a type with a compatible interface.
+ - map_point should be a function which takes dlib::vector<T,2> objects and
+ returns dlib::vector<T,2> objects. An example is point_transform_affine.
+ - set_background should be a function which can take a single argument of
+ type image_traits<image_type2>::pixel_type. Examples are black_background,
+ white_background, and no_background.
+ - get_rect(out_img).contains(area) == true
+ - is_same_object(in_img, out_img) == false
+ ensures
+ - The map_point function defines a mapping from pixels in out_img to pixels
+ in in_img. transform_image() uses this mapping, along with the supplied
+ interpolation routine interp, to fill the region of out_img defined by
+ area with an interpolated copy of in_img.
+ - This function does not change the size of out_img.
+ - Only pixels inside the region defined by area in out_img are modified.
+ - For all locations r and c such that area.contains(c,r) but have no corresponding
+ locations in in_img:
+ - set_background(out_img[r][c]) is invoked
+ (i.e. some parts of out_img might correspond to areas outside in_img and
+ therefore can't supply interpolated values. In these cases, these
+ pixels can be assigned a value by the supplied set_background() routine)
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type1,
+ typename image_type2,
+ typename interpolation_type,
+ typename point_mapping_type,
+ typename background_type
+ >
+ void transform_image (
+ const image_type1& in_img,
+ image_type2& out_img,
+ const interpolation_type& interp,
+ const point_mapping_type& map_point,
+ const background_type& set_background
+ );
+ /*!
+ requires
+ - image_type1 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - image_type2 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - interpolation_type == interpolate_nearest_neighbor, interpolate_bilinear,
+ interpolate_quadratic, or a type with a compatible interface.
+ - map_point should be a function which takes dlib::vector<T,2> objects and
+ returns dlib::vector<T,2> objects. An example is point_transform_affine.
+ - set_background should be a function which can take a single argument of
+ type image_traits<image_type2>::pixel_type. Examples are black_background, white_background,
+ and no_background.
+ - is_same_object(in_img, out_img) == false
+ ensures
+ - performs:
+ transform_image(in_img, out_img, interp, map_point, set_background, get_rect(out_img));
+ (i.e. runs transform_image() on the entire out_img)
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type1,
+ typename image_type2,
+ typename interpolation_type,
+ typename point_mapping_type
+ >
+ void transform_image (
+ const image_type1& in_img,
+ image_type2& out_img,
+ const interpolation_type& interp,
+ const point_mapping_type& map_point
+ );
+ /*!
+ requires
+ - image_type1 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - image_type2 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - interpolation_type == interpolate_nearest_neighbor, interpolate_bilinear,
+ interpolate_quadratic, or a type with a compatible interface.
+ - map_point should be a function which takes dlib::vector<T,2> objects and
+ returns dlib::vector<T,2> objects. An example is point_transform_affine.
+ - is_same_object(in_img, out_img) == false
+ ensures
+ - performs:
+ transform_image(in_img, out_img, interp, map_point, black_background(), get_rect(out_img));
+ (i.e. runs transform_image() on the entire out_img and sets non-interpolated
+ pixels to black)
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type1,
+ typename image_type2,
+ typename interpolation_type
+ >
+ point_transform_affine rotate_image (
+ const image_type1& in_img,
+ image_type2& out_img,
+ double angle,
+ const interpolation_type& interp
+ );
+ /*!
+ requires
+ - image_type1 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - image_type2 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - interpolation_type == interpolate_nearest_neighbor, interpolate_bilinear,
+ interpolate_quadratic, or a type with a compatible interface.
+ - is_same_object(in_img, out_img) == false
+ ensures
+ - #out_img == a copy of in_img which has been rotated angle radians counter clockwise.
+ The rotation is performed with respect to the center of the image.
+ - Parts of #out_img which have no corresponding locations in in_img are set to black.
+ - uses the supplied interpolation routine interp to perform the necessary
+ pixel interpolation.
+ - returns a transformation object that maps points in in_img into their corresponding
+ location in #out_img.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+
+ template <
+ typename image_type1,
+ typename image_type2
+ >
+ point_transform_affine rotate_image (
+ const image_type1& in_img,
+ image_type2& out_img,
+ double angle
+ );
+ /*!
+ requires
+ - image_type1 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - image_type2 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - pixel_traits<typename image_traits<image_type1>::pixel_type>::has_alpha == false
+ - is_same_object(in_img, out_img) == false
+ ensures
+ - #out_img == a copy of in_img which has been rotated angle radians counter clockwise.
+ The rotation is performed with respect to the center of the image.
+ - Parts of #out_img which have no corresponding locations in in_img are set to black.
+ - uses the interpolate_quadratic object to perform the necessary pixel interpolation.
+ - returns a transformation object that maps points in in_img into their corresponding
+ location in #out_img.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type1,
+ typename image_type2,
+ typename interpolation_type
+ >
+ void resize_image (
+ const image_type1& in_img,
+ image_type2& out_img,
+ const interpolation_type& interp
+ );
+ /*!
+ requires
+ - image_type1 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - image_type2 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - interpolation_type == interpolate_nearest_neighbor, interpolate_bilinear,
+ interpolate_quadratic, or a type with a compatible interface.
+ - is_same_object(in_img, out_img) == false
+ ensures
+ - #out_img == A copy of in_img which has been stretched so that it
+ fits exactly into out_img.
+ - The size of out_img is not modified. I.e.
+ - #out_img.nr() == out_img.nr()
+ - #out_img.nc() == out_img.nc()
+ - uses the supplied interpolation routine interp to perform the necessary
+ pixel interpolation.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+
+ template <
+ typename image_type1,
+ typename image_type2
+ >
+ void resize_image (
+ const image_type1& in_img,
+ image_type2& out_img
+ );
+ /*!
+ requires
+ - image_type1 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - image_type2 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - pixel_traits<typename image_traits<image_type1>::pixel_type>::has_alpha == false
+ - is_same_object(in_img, out_img) == false
+ ensures
+ - #out_img == A copy of in_img which has been stretched so that it
+ fits exactly into out_img.
+ - The size of out_img is not modified. I.e.
+ - #out_img.nr() == out_img.nr()
+ - #out_img.nc() == out_img.nc()
+ - Uses the bilinear interpolation to perform the necessary pixel interpolation.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type
+ >
+ void resize_image (
+ double size_scale,
+ image_type& img
+ );
+ /*!
+ requires
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - pixel_traits<typename image_traits<image_type>::pixel_type>::has_alpha == false
+ ensures
+ - Resizes img so that each of it's dimensions are size_scale times larger than img.
+ In particular, we will have:
+ - #img.nr() == std::round(size_scale*img.nr())
+ - #img.nc() == std::round(size_scale*img.nc())
+ - #img == a bilinearly interpolated copy of the input image.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type1,
+ typename image_type2
+ >
+ point_transform_affine flip_image_left_right (
+ const image_type1& in_img,
+ image_type2& out_img
+ );
+ /*!
+ requires
+ - image_type1 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - image_type2 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - is_same_object(in_img, out_img) == false
+ ensures
+ - #out_img.nr() == in_img.nr()
+ - #out_img.nc() == in_img.nc()
+ - #out_img == a copy of in_img which has been flipped from left to right.
+ (i.e. it is flipped as if viewed though a mirror)
+ - returns a transformation object that maps points in in_img into their
+ corresponding location in #out_img.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type
+ >
+ point_transform_affine flip_image_left_right (
+ image_type& img
+ );
+ /*!
+ requires
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ ensures
+ - This function is identical to the above version of flip_image_left_right()
+ except that it operates in-place.
+ - #img.nr() == img.nr()
+ - #img.nc() == img.nc()
+ - #img == a copy of img which has been flipped from left to right.
+ (i.e. it is flipped as if viewed though a mirror)
+ - returns a transformation object that maps points in img into their
+ corresponding location in #img.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_array_type,
+ typename T
+ >
+ void add_image_left_right_flips (
+ image_array_type& images,
+ std::vector<std::vector<T> >& objects
+ );
+ /*!
+ requires
+ - image_array_type == a dlib::array or std::vector of image objects that each
+ implement the interface defined in dlib/image_processing/generic_image.h
+ - T == rectangle, full_object_detection, or mmod_rect
+ - images.size() == objects.size()
+ ensures
+ - This function computes all the left/right flips of the contents of images and
+ then appends them onto the end of the images array. It also finds the
+ left/right flips of the rectangles in objects and similarly appends them into
+ objects. That is, we assume objects[i] is the set of bounding boxes in
+ images[i] and we flip the bounding boxes so that they still bound the same
+ objects in the new flipped images.
+ - #images.size() == images.size()*2
+ - #objects.size() == objects.size()*2
+ - All the original elements of images and objects are left unmodified. That
+ is, this function only appends new elements to each of these containers.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_array_type,
+ typename T,
+ typename U
+ >
+ void add_image_left_right_flips (
+ image_array_type& images,
+ std::vector<std::vector<T> >& objects,
+ std::vector<std::vector<U> >& objects2
+ );
+ /*!
+ requires
+ - image_array_type == a dlib::array or std::vector of image objects that each
+ implement the interface defined in dlib/image_processing/generic_image.h
+ - images.size() == objects.size()
+ - images.size() == objects2.size()
+ - T == rectangle, full_object_detection, or mmod_rect
+ - U == rectangle, full_object_detection, or mmod_rect
+ ensures
+ - This function computes all the left/right flips of the contents of images and
+ then appends them onto the end of the images array. It also finds the
+ left/right flips of the rectangles in objects and objects2 and similarly
+ appends them into objects and objects2 respectively. That is, we assume
+ objects[i] is the set of bounding boxes in images[i] and we flip the bounding
+ boxes so that they still bound the same objects in the new flipped images.
+ We similarly flip the boxes in objects2.
+ - #images.size() == images.size()*2
+ - #objects.size() == objects.size()*2
+ - #objects2.size() == objects2.size()*2
+ - All the original elements of images, objects, and objects2 are left unmodified.
+ That is, this function only appends new elements to each of these containers.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_array_type,
+ typename EXP,
+ typename T,
+ typename U
+ >
+ void add_image_rotations (
+ const matrix_exp<EXP>& angles,
+ image_array_type& images,
+ std::vector<std::vector<T> >& objects,
+ std::vector<std::vector<U> >& objects2
+ );
+ /*!
+ requires
+ - image_array_type == a dlib::array or std::vector of image objects that each
+ implement the interface defined in dlib/image_processing/generic_image.h
+ - is_vector(angles) == true
+ - angles.size() > 0
+ - images.size() == objects.size()
+ - images.size() == objects2.size()
+ - T == rectangle, full_object_detection, or mmod_rect
+ - U == rectangle, full_object_detection, or mmod_rect
+ ensures
+ - This function computes angles.size() different rotations of all the given
+ images and then replaces the contents of images with those rotations of the
+ input dataset. We will also adjust the rectangles inside objects and
+ objects2 so that they still bound the same objects in the new rotated images.
+ That is, we assume objects[i] and objects2[i] are bounding boxes for things
+ in images[i]. So we will adjust the positions of the boxes in objects and
+ objects2 accordingly.
+ - The elements of angles are interpreted as angles in radians and we will
+ rotate the images around their center using the values in angles. Moreover,
+ the rotation is done counter clockwise.
+ - #images.size() == images.size()*angles.size()
+ - #objects.size() == objects.size()*angles.size()
+ - #objects2.size() == objects2.size()*angles.size()
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_array_type,
+ typename EXP,
+ typename T
+ >
+ void add_image_rotations (
+ const matrix_exp<EXP>& angles,
+ image_array_type& images,
+ std::vector<std::vector<T> >& objects
+ );
+ /*!
+ requires
+ - image_array_type == a dlib::array or std::vector of image objects that each
+ implement the interface defined in dlib/image_processing/generic_image.h
+ - is_vector(angles) == true
+ - angles.size() > 0
+ - images.size() == objects.size()
+ - T == rectangle, full_object_detection, or mmod_rect
+ ensures
+ - This function is identical to the add_image_rotations() define above except
+ that it doesn't have objects2 as an argument.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_array_type
+ >
+ void flip_image_dataset_left_right (
+ image_array_type& images,
+ std::vector<std::vector<rectangle> >& objects
+ );
+ /*!
+ requires
+ - image_array_type == a dlib::array or std::vector of image objects that each
+ implement the interface defined in dlib/image_processing/generic_image.h
+ - images.size() == objects.size()
+ ensures
+ - This function replaces each image in images with the left/right flipped
+ version of the image. Therefore, #images[i] will contain the left/right
+ flipped version of images[i]. It also flips all the rectangles in objects so
+ that they still bound the same visual objects in each image.
+ - #images.size() == image.size()
+ - #objects.size() == objects.size()
+ - for all valid i:
+ #objects[i].size() == objects[i].size()
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_array_type
+ >
+ void flip_image_dataset_left_right (
+ image_array_type& images,
+ std::vector<std::vector<rectangle> >& objects,
+ std::vector<std::vector<rectangle> >& objects2
+ );
+ /*!
+ requires
+ - image_array_type == a dlib::array or std::vector of image objects that each
+ implement the interface defined in dlib/image_processing/generic_image.h
+ - images.size() == objects.size()
+ - images.size() == objects2.size()
+ ensures
+ - This function replaces each image in images with the left/right flipped
+ version of the image. Therefore, #images[i] will contain the left/right
+ flipped version of images[i]. It also flips all the rectangles in objects
+ and objects2 so that they still bound the same visual objects in each image.
+ - #images.size() == image.size()
+ - #objects.size() == objects.size()
+ - #objects2.size() == objects2.size()
+ - for all valid i:
+ #objects[i].size() == objects[i].size()
+ - for all valid i:
+ #objects2[i].size() == objects2[i].size()
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename pyramid_type,
+ typename image_array_type
+ >
+ void upsample_image_dataset (
+ image_array_type& images,
+ std::vector<std::vector<rectangle> >& objects,
+ unsigned long max_image_size = std::numeric_limits<unsigned long>::max()
+ );
+ /*!
+ requires
+ - image_array_type == a dlib::array or std::vector of image objects that each
+ implement the interface defined in dlib/image_processing/generic_image.h
+ - images.size() == objects.size()
+ ensures
+ - This function replaces each image in images with an upsampled version of that
+ image. Each image is upsampled using pyramid_up() and the given
+ pyramid_type. Therefore, #images[i] will contain the larger upsampled
+ version of images[i]. It also adjusts all the rectangles in objects so that
+ they still bound the same visual objects in each image.
+ - Input images already containing more than max_image_size pixels are not upsampled.
+ - #images.size() == image.size()
+ - #objects.size() == objects.size()
+ - for all valid i:
+ #objects[i].size() == objects[i].size()
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename pyramid_type,
+ typename image_array_type
+ >
+ void upsample_image_dataset (
+ image_array_type& images,
+ std::vector<std::vector<mmod_rect>>& objects,
+ unsigned long max_image_size = std::numeric_limits<unsigned long>::max()
+ );
+ /*!
+ requires
+ - image_array_type == a dlib::array or std::vector of image objects that each
+ implement the interface defined in dlib/image_processing/generic_image.h
+ - images.size() == objects.size()
+ ensures
+ - This function replaces each image in images with an upsampled version of that
+ image. Each image is upsampled using pyramid_up() and the given
+ pyramid_type. Therefore, #images[i] will contain the larger upsampled
+ version of images[i]. It also adjusts all the rectangles in objects so that
+ they still bound the same visual objects in each image.
+ - Input images already containing more than max_image_size pixels are not upsampled.
+ - #images.size() == image.size()
+ - #objects.size() == objects.size()
+ - for all valid i:
+ #objects[i].size() == objects[i].size()
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename pyramid_type,
+ typename image_array_type,
+ >
+ void upsample_image_dataset (
+ image_array_type& images,
+ std::vector<std::vector<rectangle> >& objects,
+ std::vector<std::vector<rectangle> >& objects2,
+ unsigned long max_image_size = std::numeric_limits<unsigned long>::max()
+ );
+ /*!
+ requires
+ - image_array_type == a dlib::array or std::vector of image objects that each
+ implement the interface defined in dlib/image_processing/generic_image.h
+ - images.size() == objects.size()
+ - images.size() == objects2.size()
+ ensures
+ - This function replaces each image in images with an upsampled version of that
+ image. Each image is upsampled using pyramid_up() and the given
+ pyramid_type. Therefore, #images[i] will contain the larger upsampled
+ version of images[i]. It also adjusts all the rectangles in objects and
+ objects2 so that they still bound the same visual objects in each image.
+ - Input images already containing more than max_image_size pixels are not upsampled.
+ - #images.size() == image.size()
+ - #objects.size() == objects.size()
+ - #objects2.size() == objects2.size()
+ - for all valid i:
+ #objects[i].size() == objects[i].size()
+ - for all valid i:
+ #objects2[i].size() == objects2[i].size()
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <typename image_array_type>
+ void rotate_image_dataset (
+ double angle,
+ image_array_type& images,
+ std::vector<std::vector<rectangle> >& objects
+ );
+ /*!
+ requires
+ - image_array_type == a dlib::array or std::vector of image objects that each
+ implement the interface defined in dlib/image_processing/generic_image.h
+ - images.size() == objects.size()
+ ensures
+ - This function replaces each image in images with a rotated version of that
+ image. In particular, each image is rotated using
+ rotate_image(original,rotated,angle). Therefore, the images are rotated
+ angle radians counter clockwise around their centers. That is, #images[i]
+ will contain the rotated version of images[i]. It also adjusts all
+ the rectangles in objects so that they still bound the same visual objects in
+ each image.
+ - All the rectangles will still have the same sizes and aspect ratios after
+ rotation. They will simply have had their positions adjusted so they still
+ fall on the same objects.
+ - #images.size() == image.size()
+ - #objects.size() == objects.size()
+ - for all valid i:
+ #objects[i].size() == objects[i].size()
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <typename image_array_type>
+ void rotate_image_dataset (
+ double angle,
+ image_array_type& images,
+ std::vector<std::vector<rectangle> >& objects,
+ std::vector<std::vector<rectangle> >& objects2
+ );
+ /*!
+ requires
+ - image_array_type == a dlib::array or std::vector of image objects that each
+ implement the interface defined in dlib/image_processing/generic_image.h
+ - images.size() == objects.size()
+ - images.size() == objects2.size()
+ ensures
+ - This function replaces each image in images with a rotated version of that
+ image. In particular, each image is rotated using
+ rotate_image(original,rotated,angle). Therefore, the images are rotated
+ angle radians counter clockwise around their centers. That is, #images[i]
+ will contain the rotated version of images[i]. It also adjusts all
+ the rectangles in objects and objects2 so that they still bound the same
+ visual objects in each image.
+ - All the rectangles will still have the same sizes and aspect ratios after
+ rotation. They will simply have had their positions adjusted so they still
+ fall on the same objects.
+ - #images.size() == image.size()
+ - #objects.size() == objects.size()
+ - #objects2.size() == objects2.size()
+ - for all valid i:
+ #objects[i].size() == objects[i].size()
+ - for all valid i:
+ #objects2[i].size() == objects2[i].size()
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type1,
+ typename image_type2
+ >
+ void flip_image_up_down (
+ const image_type1& in_img,
+ image_type2& out_img
+ );
+ /*!
+ requires
+ - image_type1 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - image_type2 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - is_same_object(in_img, out_img) == false
+ ensures
+ - #out_img.nr() == in_img.nr()
+ - #out_img.nc() == in_img.nc()
+ - #out_img == a copy of in_img which has been flipped upside down.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type1,
+ typename image_type2,
+ typename pyramid_type,
+ typename interpolation_type
+ >
+ void pyramid_up (
+ const image_type1& in_img,
+ image_type2& out_img,
+ const pyramid_type& pyr,
+ const interpolation_type& interp
+ );
+ /*!
+ requires
+ - image_type1 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - image_type2 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - pyramid_type == a type compatible with the image pyramid objects defined
+ in dlib/image_transforms/image_pyramid_abstract.h
+ - interpolation_type == interpolate_nearest_neighbor, interpolate_bilinear,
+ interpolate_quadratic, or a type with a compatible interface.
+ - is_same_object(in_img, out_img) == false
+ ensures
+ - This function inverts the downsampling transformation performed by pyr().
+ In particular, it attempts to make an image, out_img, which would result
+ in in_img when downsampled with pyr().
+ - #out_img == An upsampled copy of in_img. In particular, downsampling
+ #out_img 1 time with pyr() should result in a final image which looks like
+ in_img.
+ - Uses the supplied interpolation routine interp to perform the necessary
+ pixel interpolation.
+ - Note that downsampling an image with pyr() and then upsampling it with
+ pyramid_up() will not necessarily result in a final image which is
+ the same size as the original. This is because the exact size of the
+ original image cannot be determined based on the downsampled image.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type1,
+ typename image_type2,
+ typename pyramid_type
+ >
+ void pyramid_up (
+ const image_type1& in_img,
+ image_type2& out_img,
+ const pyramid_type& pyr
+ );
+ /*!
+ requires
+ - image_type1 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - image_type2 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - pyramid_type == a type compatible with the image pyramid objects defined
+ in dlib/image_transforms/image_pyramid_abstract.h
+ - is_same_object(in_img, out_img) == false
+ ensures
+ - performs: pyramid_up(in_img, out_img, pyr, interpolate_bilinear());
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type,
+ typename pyramid_type
+ >
+ void pyramid_up (
+ image_type& img,
+ const pyramid_type& pyr
+ );
+ /*!
+ requires
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - pyramid_type == a type compatible with the image pyramid objects defined
+ in dlib/image_transforms/image_pyramid_abstract.h
+ ensures
+ - Performs an in-place version of pyramid_up() on the given image. In
+ particular, this function is equivalent to:
+ pyramid_up(img, temp, pyr);
+ temp.swap(img);
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type
+ >
+ void pyramid_up (
+ image_type& img
+ );
+ /*!
+ requires
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ ensures
+ - performs: pyramid_up(img, pyramid_down<2>());
+ (i.e. it upsamples the given image and doubles it in size.)
+ !*/
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ struct chip_dims
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This is a simple tool for passing in a pair of row and column values to the
+ chip_details constructor.
+ !*/
+
+ chip_dims (
+ unsigned long rows_,
+ unsigned long cols_
+ ) : rows(rows_), cols(cols_) { }
+
+ unsigned long rows;
+ unsigned long cols;
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ struct chip_details
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This object describes where an image chip is to be extracted from within
+ another image. In particular, it specifies that the image chip is
+ contained within the rectangle this->rect and that prior to extraction the
+ image should be rotated counter-clockwise by this->angle radians. Finally,
+ the extracted chip should have this->rows rows and this->cols columns in it
+ regardless of the shape of this->rect. This means that the extracted chip
+ will be stretched to fit via bilinear interpolation when necessary.
+ !*/
+
+ chip_details(
+ );
+ /*!
+ ensures
+ - #rect.is_empty() == true
+ - #size() == 0
+ - #angle == 0
+ - #rows == 0
+ - #cols == 0
+ !*/
+
+ chip_details(
+ const drectangle& rect_
+ );
+ /*!
+ ensures
+ - #rect == rect_
+ - #size() == rect_.area()
+ - #angle == 0
+ - #rows == rect_.height()
+ - #cols == rect_.width()
+ !*/
+
+ chip_details(
+ const rectangle& rect_
+ );
+ /*!
+ ensures
+ - #rect == rect_
+ - #size() == rect_.area()
+ - #angle == 0
+ - #rows == rect_.height()
+ - #cols == rect_.width()
+ !*/
+
+ chip_details(
+ const drectangle& rect_,
+ unsigned long size_
+ );
+ /*!
+ ensures
+ - #rect == rect_
+ - #size() == size_
+ - #angle == 0
+ - #rows and #cols is set such that the total size of the chip is as close
+ to size_ as possible but still matches the aspect ratio of rect_.
+ - As long as size_ and the aspect ratio of of rect_ stays constant then
+ #rows and #cols will always have the same values. This means that, for
+ example, if you want all your chips to have the same dimensions then
+ ensure that size_ is always the same and also that rect_ always has the
+ same aspect ratio. Otherwise the calculated values of #rows and #cols
+ may be different for different chips. Alternatively, you can use the
+ chip_details constructor below that lets you specify the exact values for
+ rows and cols.
+ !*/
+
+ chip_details(
+ const drectangle& rect_,
+ unsigned long size_,
+ double angle_
+ );
+ /*!
+ ensures
+ - #rect == rect_
+ - #size() == size_
+ - #angle == angle_
+ - #rows and #cols is set such that the total size of the chip is as close
+ to size_ as possible but still matches the aspect ratio of rect_.
+ - As long as size_ and the aspect ratio of of rect_ stays constant then
+ #rows and #cols will always have the same values. This means that, for
+ example, if you want all your chips to have the same dimensions then
+ ensure that size_ is always the same and also that rect_ always has the
+ same aspect ratio. Otherwise the calculated values of #rows and #cols
+ may be different for different chips. Alternatively, you can use the
+ chip_details constructor below that lets you specify the exact values for
+ rows and cols.
+ !*/
+
+ chip_details(
+ const drectangle& rect_,
+ const chip_dims& dims
+ );
+ /*!
+ ensures
+ - #rect == rect_
+ - #size() == dims.rows*dims.cols
+ - #angle == 0
+ - #rows == dims.rows
+ - #cols == dims.cols
+ !*/
+
+ chip_details(
+ const drectangle& rect_,
+ const chip_dims& dims,
+ double angle_
+ );
+ /*!
+ ensures
+ - #rect == rect_
+ - #size() == dims.rows*dims.cols
+ - #angle == angle_
+ - #rows == dims.rows
+ - #cols == dims.cols
+ !*/
+
+ template <typename T>
+ chip_details(
+ const std::vector<dlib::vector<T,2> >& chip_points,
+ const std::vector<dlib::vector<T,2> >& img_points,
+ const chip_dims& dims
+ );
+ /*!
+ requires
+ - chip_points.size() == img_points.size()
+ - chip_points.size() >= 2
+ ensures
+ - The chip will be extracted such that the pixel locations chip_points[i]
+ in the chip are mapped to img_points[i] in the original image by a
+ similarity transform. That is, if you know the pixelwize mapping you
+ want between the chip and the original image then you use this function
+ of chip_details constructor to define the mapping.
+ - #rows == dims.rows
+ - #cols == dims.cols
+ - #size() == dims.rows*dims.cols
+ - #rect and #angle are computed based on the given size of the output chip
+ (specified by dims) and the similarity transform between the chip and
+ image (specified by chip_points and img_points).
+ !*/
+
+ inline unsigned long size() const { return rows*cols; }
+ /*!
+ ensures
+ - returns the number of pixels in this chip. This is just rows*cols.
+ !*/
+
+ drectangle rect;
+ double angle;
+ unsigned long rows;
+ unsigned long cols;
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ point_transform_affine get_mapping_to_chip (
+ const chip_details& details
+ );
+ /*!
+ ensures
+ - returns a transformation that maps from the pixels in the original image
+ to the pixels in the cropped image defined by the given details object.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ full_object_detection map_det_to_chip (
+ const full_object_detection& det,
+ const chip_details& details
+ );
+ /*!
+ ensures
+ - Maps the given detection into the pixel space of the image chip defined by
+ the given details object. That is, this function returns an object D such
+ that:
+ - D.get_rect() == a box that bounds the same thing in the image chip as
+ det.get_rect() bounds in the original image the chip is extracted from.
+ - for all valid i:
+ - D.part(i) == the location in the image chip corresponding to
+ det.part(i) in the original image.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type1,
+ typename image_type2,
+ typename interpolation_type
+ >
+ void extract_image_chips (
+ const image_type1& img,
+ const std::vector<chip_details>& chip_locations,
+ dlib::array<image_type2>& chips,
+ const interpolation_type& interp
+ );
+ /*!
+ requires
+ - image_type1 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - image_type2 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - pixel_traits<typename image_traits<image_type1>::pixel_type>::has_alpha == false
+ - for all valid i:
+ - chip_locations[i].rect.is_empty() == false
+ - chip_locations[i].size() != 0
+ - interpolation_type == interpolate_nearest_neighbor, interpolate_bilinear,
+ interpolate_quadratic, or a type with a compatible interface.
+ ensures
+ - This function extracts "chips" from an image. That is, it takes a list of
+ rectangular sub-windows (i.e. chips) within an image and extracts those
+ sub-windows, storing each into its own image. It also scales and rotates the
+ image chips according to the instructions inside each chip_details object.
+ It uses the interpolation method supplied as a parameter.
+ - #chips == the extracted image chips
+ - #chips.size() == chip_locations.size()
+ - for all valid i:
+ - #chips[i] == The image chip extracted from the position
+ chip_locations[i].rect in img.
+ - #chips[i].nr() == chip_locations[i].rows
+ - #chips[i].nc() == chip_locations[i].cols
+ - The image will have been rotated counter-clockwise by
+ chip_locations[i].angle radians, around the center of
+ chip_locations[i].rect, before the chip was extracted.
+ - Any pixels in an image chip that go outside img are set to 0 (i.e. black).
+ !*/
+
+ template <
+ typename image_type1,
+ typename image_type2
+ >
+ void extract_image_chips (
+ const image_type1& img,
+ const std::vector<chip_details>& chip_locations,
+ dlib::array<image_type2>& chips
+ );
+ /*!
+ ensures
+ - This function is a simple convenience / compatibility wrapper that calls the
+ above-defined extract_image_chips() function using bilinear interpolation.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type1,
+ typename image_type2,
+ typename interpolation_type
+ >
+ void extract_image_chip (
+ const image_type1& img,
+ const chip_details& chip_location,
+ image_type2& chip,
+ const interpolation_type& interp
+ );
+ /*!
+ ensures
+ - This function simply calls extract_image_chips() with a single chip location
+ and stores the single output chip into #chip. It uses the provided
+ interpolation method.
+ !*/
+
+ template <
+ typename image_type1,
+ typename image_type2
+ >
+ void extract_image_chip (
+ const image_type1& img,
+ const chip_details& chip_location,
+ image_type2& chip
+ );
+ /*!
+ ensures
+ - This function is a simple convenience / compatibility wrapper that calls the
+ above-defined extract_image_chip() function using bilinear interpolation.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type
+ >
+ struct sub_image_proxy
+ {
+ /*!
+ REQUIREMENTS ON image_type
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+
+ WHAT THIS OBJECT REPRESENTS
+ This is a lightweight image object for referencing a subwindow of an image.
+ It implements the generic image interface and can therefore be used with
+ any function that expects a generic image, excepting that you cannot change
+ the size of a sub_image_proxy.
+
+ Note that it only stores a pointer to the image data given to its
+ constructor and therefore does not perform a copy. Moreover, this means
+ that an instance of this object becomes invalid after the underlying image
+ data it references is destroyed.
+ !*/
+ sub_image_proxy (
+ T& img,
+ const rectangle& rect
+ );
+ /*!
+ ensures
+ - This object is an image that represents the part of img contained within
+ rect. If rect is larger than img then rect is cropped so that it does
+ not go outside img.
+ !*/
+ };
+
+ template <
+ typename image_type
+ >
+ sub_image_proxy<image_type> sub_image (
+ image_type& img,
+ const rectangle& rect
+ );
+ /*!
+ requires
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ ensures
+ - returns sub_image_proxy<image_type>(img,rect)
+ !*/
+
+ template <typename T>
+ sub_image_proxy<some_appropriate_type> sub_image (
+ T* img,
+ long nr,
+ long nc,
+ long row_stride
+ );
+ /*!
+ requires
+ - img == a pointer to at least nr*row_stride T objects
+ - nr >= 0
+ - nc >= 0
+ - row_stride >= 0
+ ensures
+ - This function returns an image that is just a thin wrapper around the given
+ pointer. It will have the dimensions defined by the supplied longs. To be
+ precise, this function returns an image object IMG such that:
+ - image_data(IMG) == img
+ - num_rows(IMG) == nr
+ - num_columns(IMG) == nc
+ - width_step(IMG) == row_stride*sizeof(T)
+ - IMG contains pixels of type T.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type
+ >
+ struct const_sub_image_proxy
+ {
+ /*!
+ REQUIREMENTS ON image_type
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+
+ WHAT THIS OBJECT REPRESENTS
+ This object is just like sub_image_proxy except that it does not allow the
+ pixel data to be modified.
+ !*/
+ const_sub_image_proxy (
+ const T& img,
+ const rectangle& rect
+ );
+ /*!
+ ensures
+ - This object is an image that represents the part of img contained within
+ rect. If rect is larger than img then rect is cropped so that it does
+ not go outside img.
+ !*/
+ };
+
+ template <
+ typename image_type
+ >
+ const const_sub_image_proxy<image_type> sub_image (
+ const image_type& img,
+ const rectangle& rect
+ );
+ /*!
+ requires
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ ensures
+ - returns const_sub_image_proxy<image_type>(img,rect)
+ !*/
+
+ template <typename T>
+ const const_sub_image_proxy<some_appropriate_type> sub_image (
+ const T* img,
+ long nr,
+ long nc,
+ long row_stride
+ );
+ /*!
+ requires
+ - img == a pointer to at least nr*row_stride T objects
+ - nr >= 0
+ - nc >= 0
+ - row_stride >= 0
+ ensures
+ - This function returns an image that is just a thin wrapper around the given
+ pointer. It will have the dimensions defined by the supplied longs. To be
+ precise, this function returns an image object IMG such that:
+ - image_data(IMG) == img
+ - num_rows(IMG) == nr
+ - num_columns(IMG) == nc
+ - width_step(IMG) == row_stride*sizeof(T)
+ - IMG contains pixels of type T.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ chip_details get_face_chip_details (
+ const full_object_detection& det,
+ const unsigned long size = 200,
+ const double padding = 0.2
+ );
+ /*!
+ requires
+ - det.num_parts() == 68 || det.num_parts() == 5
+ - size > 0
+ - padding >= 0
+ ensures
+ - This function assumes det contains a human face detection with face parts
+ annotated using the annotation scheme from the iBUG 300-W face landmark
+ dataset or a 5 point face annotation. Given these assumptions, it creates a
+ chip_details object that will extract a copy of the face that has been
+ rotated upright, centered, and scaled to a standard size when given to
+ extract_image_chip().
+ - This function is specifically calibrated to work with one of these models:
+ - http://dlib.net/files/shape_predictor_5_face_landmarks.dat.bz2
+ - http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
+ - The extracted chips will have size rows and columns in them.
+ - if padding == 0 then the chip will be closely cropped around the face.
+ Setting larger padding values will result a looser cropping. In particular,
+ a padding of 0.5 would double the width of the cropped area, a value of 1
+ would triple it, and so forth.
+ - The 5 point face annotation scheme is assumed to be:
+ - det part 0 == left eye corner, outside part of eye.
+ - det part 1 == left eye corner, inside part of eye.
+ - det part 2 == right eye corner, outside part of eye.
+ - det part 3 == right eye corner, inside part of eye.
+ - det part 4 == immediately under the nose, right at the top of the philtrum.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ std::vector<chip_details> get_face_chip_details (
+ const std::vector<full_object_detection>& dets,
+ const unsigned long size = 200,
+ const double padding = 0.2
+ );
+ /*!
+ requires
+ - for all valid i:
+ - det[i].num_parts() == 68
+ - size > 0
+ - padding >= 0
+ ensures
+ - This function is identical to the version of get_face_chip_details() defined
+ above except that it creates and returns an array of chip_details objects,
+ one for each input full_object_detection.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type
+ >
+ image_type jitter_image(
+ const image_type& img,
+ dlib::rand& rnd
+ );
+ /*!
+ requires
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - pixel_traits<typename image_traits<image_type>::pixel_type>::has_alpha == false
+ - img.size() > 0
+ - img.nr() == img.nc()
+ ensures
+ - Randomly jitters the image a little bit and returns this new jittered image.
+ To be specific, the returned image has the same size as img and will look
+ generally similar. The difference is that the returned image will have been
+ slightly rotated, zoomed, and translated. There is also a 50% chance it will
+ be mirrored left to right.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_INTERPOlATION_ABSTRACT_
+
diff --git a/ml/dlib/dlib/image_transforms/label_connected_blobs.h b/ml/dlib/dlib/image_transforms/label_connected_blobs.h
new file mode 100644
index 000000000..c25346c76
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/label_connected_blobs.h
@@ -0,0 +1,188 @@
+// Copyright (C) 2011 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_LABEL_CONNeCTED_BLOBS_H_
+#define DLIB_LABEL_CONNeCTED_BLOBS_H_
+
+#include "label_connected_blobs_abstract.h"
+#include "../geometry.h"
+#include <stack>
+#include <vector>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ struct neighbors_8
+ {
+ void operator() (
+ const point& p,
+ std::vector<point>& neighbors
+ ) const
+ {
+ neighbors.push_back(point(p.x()+1,p.y()+1));
+ neighbors.push_back(point(p.x()+1,p.y() ));
+ neighbors.push_back(point(p.x()+1,p.y()-1));
+
+ neighbors.push_back(point(p.x(),p.y()+1));
+ neighbors.push_back(point(p.x(),p.y()-1));
+
+ neighbors.push_back(point(p.x()-1,p.y()+1));
+ neighbors.push_back(point(p.x()-1,p.y() ));
+ neighbors.push_back(point(p.x()-1,p.y()-1));
+ }
+ };
+
+ struct neighbors_4
+ {
+ void operator() (
+ const point& p,
+ std::vector<point>& neighbors
+ ) const
+ {
+ neighbors.push_back(point(p.x()+1,p.y()));
+ neighbors.push_back(point(p.x()-1,p.y()));
+ neighbors.push_back(point(p.x(),p.y()+1));
+ neighbors.push_back(point(p.x(),p.y()-1));
+ }
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ struct connected_if_both_not_zero
+ {
+ template <typename image_type>
+ bool operator() (
+ const image_type& img,
+ const point& a,
+ const point& b
+ ) const
+ {
+ return (img[a.y()][a.x()] != 0 && img[b.y()][b.x()] != 0);
+ }
+ };
+
+ struct connected_if_equal
+ {
+ template <typename image_type>
+ bool operator() (
+ const image_type& img,
+ const point& a,
+ const point& b
+ ) const
+ {
+ return (img[a.y()][a.x()] == img[b.y()][b.x()]);
+ }
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ struct zero_pixels_are_background
+ {
+ template <typename image_type>
+ bool operator() (
+ const image_type& img,
+ const point& p
+ ) const
+ {
+ return img[p.y()][p.x()] == 0;
+ }
+
+ };
+
+ struct nothing_is_background
+ {
+ template <typename image_type>
+ bool operator() (
+ const image_type&,
+ const point&
+ ) const
+ {
+ return false;
+ }
+
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type,
+ typename label_image_type,
+ typename background_functor_type,
+ typename neighbors_functor_type,
+ typename connected_functor_type
+ >
+ unsigned long label_connected_blobs (
+ const image_type& img_,
+ const background_functor_type& is_background,
+ const neighbors_functor_type& get_neighbors,
+ const connected_functor_type& is_connected,
+ label_image_type& label_img_
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(is_same_object(img_, label_img_) == false,
+ "\t unsigned long label_connected_blobs()"
+ << "\n\t The input image and output label image can't be the same object."
+ );
+
+ const_image_view<image_type> img(img_);
+ image_view<label_image_type> label_img(label_img_);
+
+ std::stack<point> neighbors;
+ label_img.set_size(img.nr(), img.nc());
+ assign_all_pixels(label_img, 0);
+ unsigned long next = 1;
+
+ if (img.size() == 0)
+ return 0;
+
+ const rectangle area = get_rect(img);
+
+ std::vector<point> window;
+
+ for (long r = 0; r < img.nr(); ++r)
+ {
+ for (long c = 0; c < img.nc(); ++c)
+ {
+ // skip already labeled pixels or background pixels
+ if (label_img[r][c] != 0 || is_background(img,point(c,r)))
+ continue;
+
+ label_img[r][c] = next;
+
+ // label all the neighbors of this point
+ neighbors.push(point(c,r));
+ while (neighbors.size() > 0)
+ {
+ const point p = neighbors.top();
+ neighbors.pop();
+
+ window.clear();
+ get_neighbors(p, window);
+
+ for (unsigned long i = 0; i < window.size(); ++i)
+ {
+ if (area.contains(window[i]) && // point in image.
+ !is_background(img,window[i]) && // isn't background.
+ label_img[window[i].y()][window[i].x()] == 0 && // haven't already labeled it.
+ is_connected(img, p, window[i])) // it's connected.
+ {
+ label_img[window[i].y()][window[i].x()] = next;
+ neighbors.push(window[i]);
+ }
+ }
+ }
+
+ ++next;
+ }
+ }
+
+ return next;
+ }
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_LABEL_CONNeCTED_BLOBS_H_
+
diff --git a/ml/dlib/dlib/image_transforms/label_connected_blobs_abstract.h b/ml/dlib/dlib/image_transforms/label_connected_blobs_abstract.h
new file mode 100644
index 000000000..5dc984000
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/label_connected_blobs_abstract.h
@@ -0,0 +1,199 @@
+// Copyright (C) 2011 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_LABEL_CONNeCTED_BLOBS_ABSTRACT_H_
+#ifdef DLIB_LABEL_CONNeCTED_BLOBS_ABSTRACT_H_
+
+#include "../geometry.h"
+#include <vector>
+#include "../image_processing/generic_image.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ struct neighbors_8
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This object is a pixel neighborhood generating functor for
+ use with the label_connected_blobs() routine defined below.
+ !*/
+
+ void operator() (
+ const point& p,
+ std::vector<point>& neighbors
+ ) const;
+ /*!
+ ensures
+ - adds the 8 neighboring pixels surrounding p into neighbors
+ !*/
+ };
+
+ struct neighbors_4
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This object is a pixel neighborhood generating functor for
+ use with the label_connected_blobs() routine defined below.
+ !*/
+
+ void operator() (
+ const point& p,
+ std::vector<point>& neighbors
+ ) const;
+ /*!
+ ensures
+ - adds the 4 neighboring pixels of p into neighbors. These
+ are the ones immediately to the left, top, right, and bottom.
+ !*/
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ struct connected_if_both_not_zero
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This object is a pixel connection testing functor for use
+ with the label_connected_blobs() routine defined below.
+ !*/
+
+ template <typename image_view_type>
+ bool operator() (
+ const image_view_type& img,
+ const point& a,
+ const point& b
+ ) const
+ {
+ return (img[a.y()][a.x()] != 0 && img[b.y()][b.x()] != 0);
+ }
+ };
+
+ struct connected_if_equal
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This object is a pixel connection testing functor for use
+ with the label_connected_blobs() routine defined below.
+ !*/
+
+ template <typename image_view_type>
+ bool operator() (
+ const image_view_type& img,
+ const point& a,
+ const point& b
+ ) const
+ {
+ return (img[a.y()][a.x()] == img[b.y()][b.x()]);
+ }
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ struct zero_pixels_are_background
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This object is a background testing functor for use
+ with the label_connected_blobs() routine defined below.
+ !*/
+
+ template <typename image_view_type>
+ bool operator() (
+ const image_view_type& img,
+ const point& p
+ ) const
+ {
+ return img[p.y()][p.x()] == 0;
+ }
+
+ };
+
+ struct nothing_is_background
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This object is a background testing functor for use
+ with the label_connected_blobs() routine defined below.
+ !*/
+
+ template <typename image_view_type>
+ bool operator() (
+ const image_view_type&,
+ const point&
+ ) const
+ {
+ return false;
+ }
+
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type,
+ typename label_image_type,
+ typename background_functor_type,
+ typename neighbors_functor_type,
+ typename connected_functor_type
+ >
+ unsigned long label_connected_blobs (
+ const image_type& img,
+ const background_functor_type& is_background,
+ const neighbors_functor_type& get_neighbors,
+ const connected_functor_type& is_connected,
+ label_image_type& label_img
+ );
+ /*!
+ requires
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - label_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h and it must contain integer pixels.
+ - is_background(img, point(c,r)) is a legal expression that evaluates to a bool.
+ - is_connected(img, point(c,r), point(c2,r2)) is a legal expression that
+ evaluates to a bool.
+ - get_neighbors(point(c,r), neighbors) is a legal expression where neighbors
+ is of type std::vector<point>.
+ - is_same_object(img, label_img) == false
+ ensures
+ - This function labels each of the connected blobs in img with a unique integer
+ label.
+ - An image can be thought of as a graph where pixels A and B are connected if
+ and only if the following two statements are satisfied:
+ - is_connected(img,A,B) == true
+ - get_neighbors(A, neighbors) results in neighbors containing B or
+ get_neighbors(B, neighbors) results in neighbors containing A.
+ Then this function can be understood as labeling all the connected components
+ of this pixel graph such that all pixels in a component get the same label while
+ pixels in different components get different labels. Note that there is a
+ special "background" component determined by is_background(). Any pixels which
+ are "background" always get a blob id of 0 regardless of any other considerations.
+ - #label_img.nr() == img.nr()
+ - #label_img.nc() == img.nc()
+ - for all valid r and c:
+ - #label_img[r][c] == the blob label number for pixel img[r][c].
+ - #label_img[r][c] >= 0
+ - if (is_background(img, point(c,r))) then
+ - #label_img[r][c] == 0
+ - else
+ - #label_img[r][c] != 0
+ - if (img.size() != 0) then
+ - returns max(mat(#label_img))+1
+ (i.e. returns a number one greater than the maximum blob id number,
+ this is the number of blobs found.)
+ - else
+ - returns 0
+ - blob labels are contiguous, therefore, the number returned by this function is
+ the number of blobs in the image (including the background blob).
+ - It is guaranteed that is_connected() and is_background() will never be
+ called with points outside the image.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_LABEL_CONNeCTED_BLOBS_ABSTRACT_H_
+
diff --git a/ml/dlib/dlib/image_transforms/lbp.h b/ml/dlib/dlib/image_transforms/lbp.h
new file mode 100644
index 000000000..b6bbac9cf
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/lbp.h
@@ -0,0 +1,307 @@
+// Copyright (C) 2014 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_LBP_Hh_
+#define DLIB_LBP_Hh_
+
+#include "lbp_abstract.h"
+#include "../image_processing/generic_image.h"
+#include "assign_image.h"
+#include "../pixel.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type,
+ typename image_type2
+ >
+ void make_uniform_lbp_image (
+ const image_type& img_,
+ image_type2& lbp_
+ )
+ {
+ const static unsigned char uniform_lbps[] = {
+ 0, 1, 2, 3, 4, 58, 5, 6, 7, 58, 58, 58, 8, 58, 9, 10, 11, 58, 58, 58, 58, 58,
+ 58, 58, 12, 58, 58, 58, 13, 58, 14, 15, 16, 58, 58, 58, 58, 58, 58, 58, 58, 58,
+ 58, 58, 58, 58, 58, 58, 17, 58, 58, 58, 58, 58, 58, 58, 18, 58, 58, 58, 19, 58,
+ 20, 21, 22, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58,
+ 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 23, 58, 58, 58, 58, 58,
+ 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 24, 58, 58, 58, 58, 58, 58, 58, 25, 58,
+ 58, 58, 26, 58, 27, 28, 29, 30, 58, 31, 58, 58, 58, 32, 58, 58, 58, 58, 58, 58,
+ 58, 33, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 34, 58, 58,
+ 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58,
+ 58, 58, 58, 58, 58, 58, 58, 58, 58, 35, 36, 37, 58, 38, 58, 58, 58, 39, 58, 58,
+ 58, 58, 58, 58, 58, 40, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58,
+ 58, 41, 42, 43, 58, 44, 58, 58, 58, 45, 58, 58, 58, 58, 58, 58, 58, 46, 47, 48,
+ 58, 49, 58, 58, 58, 50, 51, 52, 58, 53, 54, 55, 56, 57
+ };
+
+ COMPILE_TIME_ASSERT(sizeof(uniform_lbps) == 256);
+
+ const_image_view<image_type> img(img_);
+ image_view<image_type2> lbp(lbp_);
+
+ lbp.set_size(img.nr(), img.nc());
+
+ // set all the border pixels to the "non-uniform LBP value".
+ assign_border_pixels(lbp, 1, 1, 58);
+
+ typedef typename image_traits<image_type>::pixel_type pixel_type;
+ typedef typename pixel_traits<pixel_type>::basic_pixel_type basic_pixel_type;
+
+ for (long r = 1; r+1 < img.nr(); ++r)
+ {
+ for (long c = 1; c+1 < img.nc(); ++c)
+ {
+ const basic_pixel_type pix = get_pixel_intensity(img[r][c]);
+ unsigned char b1 = 0;
+ unsigned char b2 = 0;
+ unsigned char b3 = 0;
+ unsigned char b4 = 0;
+ unsigned char b5 = 0;
+ unsigned char b6 = 0;
+ unsigned char b7 = 0;
+ unsigned char b8 = 0;
+
+ unsigned char x = 0;
+ if (get_pixel_intensity(img[r-1][c-1]) > pix) b1 = 0x80;
+ if (get_pixel_intensity(img[r-1][c ]) > pix) b2 = 0x40;
+ if (get_pixel_intensity(img[r-1][c+1]) > pix) b3 = 0x20;
+ x |= b1;
+ if (get_pixel_intensity(img[r ][c-1]) > pix) b4 = 0x10;
+ x |= b2;
+ if (get_pixel_intensity(img[r ][c+1]) > pix) b5 = 0x08;
+ x |= b3;
+ if (get_pixel_intensity(img[r+1][c-1]) > pix) b6 = 0x04;
+ x |= b4;
+ if (get_pixel_intensity(img[r+1][c ]) > pix) b7 = 0x02;
+ x |= b5;
+ if (get_pixel_intensity(img[r+1][c+1]) > pix) b8 = 0x01;
+
+ x |= b6;
+ x |= b7;
+ x |= b8;
+
+ lbp[r][c] = uniform_lbps[x];
+ }
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type,
+ typename T
+ >
+ void extract_histogram_descriptors (
+ const image_type& img_,
+ const point& loc,
+ std::vector<T>& histograms,
+ const unsigned int cell_size = 10,
+ const unsigned int block_size = 4,
+ const unsigned int max_val = 58
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(cell_size >= 1 && block_size >= 1 && max_val < 256 &&
+ (unsigned int)max(mat(img_)) <= max_val,
+ "\t void extract_histogram_descriptors()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t cell_size: " << cell_size
+ << "\n\t block_size: " << block_size
+ << "\n\t max_val: " << max_val
+ << "\n\t max(mat(img_)): " << max(mat(img_))
+ );
+
+ typedef typename image_traits<image_type>::pixel_type pixel_type;
+ COMPILE_TIME_ASSERT((is_same_type<pixel_type, unsigned char>::value));
+
+ const_image_view<image_type> img(img_);
+
+ const rectangle area = get_rect(img);
+ const rectangle window = centered_rect(loc, block_size*cell_size, block_size*cell_size);
+ unsigned int cell_top = window.top();
+ for (unsigned int br = 0; br < block_size; ++br)
+ {
+ unsigned int cell_left = window.left();
+ for (unsigned int bc = 0; bc < block_size; ++bc)
+ {
+ // figure out the cell boundaries
+ rectangle cell(cell_left, cell_top, cell_left+cell_size-1, cell_top+cell_size-1);
+ cell = cell.intersect(area);
+
+ // make the actual histogram for this cell
+ unsigned int hist[256] = {0};
+ for (long r = cell.top(); r <= cell.bottom(); ++r)
+ {
+ for (long c = cell.left(); c <= cell.right(); ++c)
+ {
+ hist[img[r][c]]++;
+ }
+ }
+
+ // copy histogram into the output.
+ histograms.insert(histograms.end(), hist, hist + max_val+1);
+
+ cell_left += cell_size;
+ }
+ cell_top += cell_size;
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type,
+ typename T
+ >
+ void extract_uniform_lbp_descriptors (
+ const image_type& img,
+ std::vector<T>& feats,
+ const unsigned int cell_size = 10
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(cell_size >= 1,
+ "\t void extract_uniform_lbp_descriptors()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t cell_size: " << cell_size
+ );
+
+ feats.clear();
+ array2d<unsigned char> lbp;
+ make_uniform_lbp_image(img, lbp);
+ for (long r = 0; r < lbp.nr(); r+=cell_size)
+ {
+ for (long c = 0; c < lbp.nc(); c+=cell_size)
+ {
+ const rectangle cell = rectangle(c,r,c+cell_size-1,r+cell_size-1).intersect(get_rect(lbp));
+ // make the actual histogram for this cell
+ unsigned int hist[59] = {0};
+ for (long r = cell.top(); r <= cell.bottom(); ++r)
+ {
+ for (long c = cell.left(); c <= cell.right(); ++c)
+ {
+ hist[lbp[r][c]]++;
+ }
+ }
+
+ // copy histogram into the output.
+ feats.insert(feats.end(), hist, hist + 59);
+ }
+ }
+
+ for (unsigned long i = 0; i < feats.size(); ++i)
+ feats[i] = std::sqrt(feats[i]);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type,
+ typename T
+ >
+ void extract_highdim_face_lbp_descriptors (
+ const image_type& img,
+ const full_object_detection& det,
+ std::vector<T>& feats
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_CASSERT(det.num_parts() == 68,
+ "\t void extract_highdim_face_lbp_descriptors()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t det.num_parts(): " << det.num_parts()
+ );
+
+ const unsigned long num_scales = 5;
+ feats.clear();
+ dlib::vector<double,2> l, r;
+ double cnt = 0;
+ // Find the center of the left eye by averaging the points around
+ // the eye.
+ for (unsigned long i = 36; i <= 41; ++i)
+ {
+ l += det.part(i);
+ ++cnt;
+ }
+ l /= cnt;
+
+ // Find the center of the right eye by averaging the points around
+ // the eye.
+ cnt = 0;
+ for (unsigned long i = 42; i <= 47; ++i)
+ {
+ r += det.part(i);
+ ++cnt;
+ }
+ r /= cnt;
+
+ // We only do feature extraction from these face parts. These are things like the
+ // corners of the eyes and mouth and stuff like that.
+ std::vector<point> parts;
+ parts.reserve(30);
+ parts.push_back(l);
+ parts.push_back(r);
+ parts.push_back(det.part(17));
+ parts.push_back(det.part(21));
+ parts.push_back(det.part(22));
+ parts.push_back(det.part(26));
+ parts.push_back(det.part(36));
+ parts.push_back(det.part(39));
+ parts.push_back(det.part(42));
+ parts.push_back(det.part(45));
+ parts.push_back(det.part(27));
+ parts.push_back(det.part(28));
+ parts.push_back(det.part(29));
+ parts.push_back(det.part(30));
+ parts.push_back(det.part(31));
+ parts.push_back(det.part(35));
+ parts.push_back(det.part(33));
+ parts.push_back(det.part(48));
+ parts.push_back(det.part(54));
+ parts.push_back(det.part(51));
+ parts.push_back(det.part(57));
+
+ array2d<unsigned char> lbp;
+ make_uniform_lbp_image(img, lbp);
+ for (unsigned long i = 0; i < parts.size(); ++i)
+ extract_histogram_descriptors(lbp, parts[i], feats);
+
+ if (num_scales > 1)
+ {
+ pyramid_down<4> pyr;
+ image_type img_temp;
+ pyr(img, img_temp);
+ unsigned long num_pyr_calls = 1;
+
+ // now pull the features out at coarser scales
+ for (unsigned long iter = 1; iter < num_scales; ++iter)
+ {
+ // now do the feature extraction
+ make_uniform_lbp_image(img_temp, lbp);
+ for (unsigned long i = 0; i < parts.size(); ++i)
+ extract_histogram_descriptors(lbp, pyr.point_down(parts[i],num_pyr_calls), feats);
+
+ if (iter+1 < num_scales)
+ {
+ pyr(img_temp);
+ ++num_pyr_calls;
+ }
+ }
+ }
+
+ for (unsigned long i = 0; i < feats.size(); ++i)
+ feats[i] = std::sqrt(feats[i]);
+
+ DLIB_ASSERT(feats.size() == 99120, feats.size());
+ }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_LBP_Hh_
+
diff --git a/ml/dlib/dlib/image_transforms/lbp_abstract.h b/ml/dlib/dlib/image_transforms/lbp_abstract.h
new file mode 100644
index 000000000..1a20082a2
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/lbp_abstract.h
@@ -0,0 +1,139 @@
+// Copyright (C) 2014 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_LBP_ABSTRACT_Hh_
+#ifdef DLIB_LBP_ABSTRACT_Hh_
+
+#include "../image_processing/generic_image.h"
+#include "../pixel.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type,
+ typename image_type2
+ >
+ void make_uniform_lbp_image (
+ const image_type& img,
+ image_type2& lbp
+ );
+ /*!
+ requires
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - image_type2 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - image_type2 should contain a grayscale pixel type such as unsigned char.
+ ensures
+ - #lbp.nr() == img.nr()
+ - #lbp.nc() == img.nc()
+ - This function extracts the uniform local-binary-pattern feature at every pixel
+ and stores it into #lbp. In particular, we have the following for all valid
+ r and c:
+ - #lbp[r][c] == the uniform LBP for the 3x3 pixel window centered on img[r][c].
+ In particular, this is a value in the range 0 to 58 inclusive.
+ - We use the idea of uniform LBPs from the paper:
+ Face Description with Local Binary Patterns: Application to Face Recognition
+ by Ahonen, Hadid, and Pietikainen.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type,
+ typename T
+ >
+ void extract_histogram_descriptors (
+ const image_type& img,
+ const point& loc,
+ std::vector<T>& histograms,
+ const unsigned int cell_size = 10,
+ const unsigned int block_size = 4,
+ const unsigned int max_val = 58
+ );
+ /*!
+ requires
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - image_type contains unsigned char valued pixels.
+ - T is some scalar type like int or double
+ - All pixel values in img are <= max_val
+ - cell_size >= 1
+ - block_size >= 1
+ - max_val < 256
+ ensures
+ - This function extracts histograms of pixel values from block_size*block_size
+ windows in the area in img immediately around img[loc.y()][loc.x()]. The
+ histograms are appended onto the end of #histograms. Each window is
+ cell_size pixels wide and tall. Moreover, the windows do not overlap.
+ - #histograms.size() == histograms.size() + block_size*block_size*(max_val+1)
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type,
+ typename T
+ >
+ void extract_uniform_lbp_descriptors (
+ const image_type& img,
+ std::vector<T>& feats,
+ const unsigned int cell_size = 10
+ );
+ /*!
+ requires
+ - cell_size >= 1
+ - T is some scalar type like int or double
+ ensures
+ - Extracts histograms of uniform local-binary-patterns from img. The
+ histograms are from densely tiled windows that are cell_size pixels wide and
+ tall. The windows do not overlap and cover all of img.
+ - #feats.size() == 59*(number of windows that fit into img)
+ (i.e. #feats contains the LBP histograms)
+ - We will have taken the square root of all the histogram elements. That is,
+ #feats[i] is the square root of the number of LBPs that appeared in its
+ corresponding window.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type,
+ typename T
+ >
+ void extract_highdim_face_lbp_descriptors (
+ const image_type& img,
+ const full_object_detection& det,
+ std::vector<T>& feats
+ );
+ /*!
+ requires
+ - T is some scalar type like int or double
+ - det.num_parts() == 68
+ ensures
+ - This function extracts the high-dimensional LBP feature described in the
+ paper:
+ Blessing of Dimensionality: High-dimensional Feature and Its Efficient
+ Compression for Face Verification by Dong Chen, Xudong Cao, Fang Wen, and
+ Jian Sun
+ - #feats == the high-dimensional LBP descriptor. It is the concatenation of
+ many LBP histograms, each extracted from different scales and from different
+ windows around different face landmarks. We also take the square root of
+ each histogram element before storing it into #feats.
+ - #feats.size() == 99120
+ - This function assumes img has already been aligned and normalized to a
+ standard size.
+ - This function assumes det contains a human face detection with face parts
+ annotated using the annotation scheme from the iBUG 300-W face landmark
+ dataset. This means that det.part(i) gives the locations of different face
+ landmarks according to the iBUG 300-W annotation scheme.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_LBP_ABSTRACT_Hh_
+
diff --git a/ml/dlib/dlib/image_transforms/morphological_operations.h b/ml/dlib/dlib/image_transforms/morphological_operations.h
new file mode 100644
index 000000000..a659e4bdc
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/morphological_operations.h
@@ -0,0 +1,846 @@
+// Copyright (C) 2006 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_MORPHOLOGICAL_OPERATIONs_
+#define DLIB_MORPHOLOGICAL_OPERATIONs_
+
+#include "../pixel.h"
+#include "thresholding.h"
+#include "morphological_operations_abstract.h"
+#include "assign_image.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ namespace morphological_operations_helpers
+ {
+ template <typename image_type>
+ bool is_binary_image (
+ const image_type& img_
+ )
+ /*!
+ ensures
+ - returns true if img_ contains only on_pixel and off_pixel values.
+ - returns false otherwise
+ !*/
+ {
+ const_image_view<image_type> img(img_);
+ for (long r = 0; r < img.nr(); ++r)
+ {
+ for (long c = 0; c < img.nc(); ++c)
+ {
+ if (img[r][c] != on_pixel && img[r][c] != off_pixel)
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ template <
+ long M,
+ long N
+ >
+ bool is_binary_image (
+ const unsigned char (&structuring_element)[M][N]
+ )
+ /*!
+ ensures
+ - returns true if structuring_element contains only on_pixel and off_pixel values.
+ - returns false otherwise
+ !*/
+ {
+ for (long m = 0; m < M; ++m)
+ {
+ for (long n = 0; n < N; ++n)
+ {
+ if (structuring_element[m][n] != on_pixel &&
+ structuring_element[m][n] != off_pixel)
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type,
+ long M,
+ long N
+ >
+ void binary_dilation (
+ const in_image_type& in_img_,
+ out_image_type& out_img_,
+ const unsigned char (&structuring_element)[M][N]
+ )
+ {
+ typedef typename image_traits<in_image_type>::pixel_type in_pixel_type;
+ typedef typename image_traits<out_image_type>::pixel_type out_pixel_type;
+ COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false );
+ COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false );
+
+ using namespace morphological_operations_helpers;
+ COMPILE_TIME_ASSERT(M%2 == 1);
+ COMPILE_TIME_ASSERT(N%2 == 1);
+ DLIB_ASSERT(is_same_object(in_img_,out_img_) == false,
+ "\tvoid binary_dilation()"
+ << "\n\tYou must give two different image objects"
+ );
+ COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type>::grayscale);
+ DLIB_ASSERT(is_binary_image(in_img_) ,
+ "\tvoid binary_dilation()"
+ << "\n\tin_img must be a binary image"
+ );
+ DLIB_ASSERT(is_binary_image(structuring_element) ,
+ "\tvoid binary_dilation()"
+ << "\n\tthe structuring_element must be a binary image"
+ );
+
+
+ const_image_view<in_image_type> in_img(in_img_);
+ image_view<out_image_type> out_img(out_img_);
+
+ // if there isn't any input image then don't do anything
+ if (in_img.size() == 0)
+ {
+ out_img.clear();
+ return;
+ }
+
+ out_img.set_size(in_img.nr(),in_img.nc());
+
+ // apply the filter to the image
+ for (long r = 0; r < in_img.nr(); ++r)
+ {
+ for (long c = 0; c < in_img.nc(); ++c)
+ {
+ unsigned char out_pixel = off_pixel;
+ for (long m = 0; m < M && out_pixel == off_pixel; ++m)
+ {
+ for (long n = 0; n < N && out_pixel == off_pixel; ++n)
+ {
+ if (structuring_element[m][n] == on_pixel)
+ {
+ // if this pixel is inside the image then get it from the image
+ // but if it isn't just pretend it was an off_pixel value
+ if (r+m >= M/2 && c+n >= N/2 &&
+ r+m-M/2 < in_img.nr() && c+n-N/2 < in_img.nc())
+ {
+ out_pixel = in_img[r+m-M/2][c+n-N/2];
+ }
+ }
+ }
+ }
+ assign_pixel(out_img[r][c], out_pixel);
+ }
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type,
+ long M,
+ long N
+ >
+ void binary_erosion (
+ const in_image_type& in_img_,
+ out_image_type& out_img_,
+ const unsigned char (&structuring_element)[M][N]
+ )
+ {
+ typedef typename image_traits<in_image_type>::pixel_type in_pixel_type;
+ typedef typename image_traits<out_image_type>::pixel_type out_pixel_type;
+ COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false );
+ COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false );
+
+ using namespace morphological_operations_helpers;
+ COMPILE_TIME_ASSERT(M%2 == 1);
+ COMPILE_TIME_ASSERT(N%2 == 1);
+ DLIB_ASSERT(is_same_object(in_img_,out_img_) == false,
+ "\tvoid binary_erosion()"
+ << "\n\tYou must give two different image objects"
+ );
+ COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type>::grayscale);
+ DLIB_ASSERT(is_binary_image(in_img_) ,
+ "\tvoid binary_erosion()"
+ << "\n\tin_img must be a binary image"
+ );
+ DLIB_ASSERT(is_binary_image(structuring_element) ,
+ "\tvoid binary_erosion()"
+ << "\n\tthe structuring_element must be a binary image"
+ );
+
+ const_image_view<in_image_type> in_img(in_img_);
+ image_view<out_image_type> out_img(out_img_);
+
+
+ // if there isn't any input image then don't do anything
+ if (in_img.size() == 0)
+ {
+ out_img.clear();
+ return;
+ }
+
+ out_img.set_size(in_img.nr(),in_img.nc());
+
+ // apply the filter to the image
+ for (long r = 0; r < in_img.nr(); ++r)
+ {
+ for (long c = 0; c < in_img.nc(); ++c)
+ {
+ unsigned char out_pixel = on_pixel;
+ for (long m = 0; m < M && out_pixel == on_pixel; ++m)
+ {
+ for (long n = 0; n < N && out_pixel == on_pixel; ++n)
+ {
+ if (structuring_element[m][n] == on_pixel)
+ {
+ // if this pixel is inside the image then get it from the image
+ // but if it isn't just pretend it was an off_pixel value
+ if (r+m >= M/2 && c+n >= N/2 &&
+ r+m-M/2 < in_img.nr() && c+n-N/2 < in_img.nc())
+ {
+ out_pixel = in_img[r+m-M/2][c+n-N/2];
+ }
+ else
+ {
+ out_pixel = off_pixel;
+ }
+ }
+ }
+ }
+ assign_pixel(out_img[r][c], out_pixel);
+ }
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type,
+ long M,
+ long N
+ >
+ void binary_open (
+ const in_image_type& in_img,
+ out_image_type& out_img,
+ const unsigned char (&structuring_element)[M][N],
+ const unsigned long iter = 1
+ )
+ {
+ typedef typename image_traits<in_image_type>::pixel_type in_pixel_type;
+ typedef typename image_traits<out_image_type>::pixel_type out_pixel_type;
+ COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false );
+ COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false );
+
+ using namespace morphological_operations_helpers;
+ COMPILE_TIME_ASSERT(M%2 == 1);
+ COMPILE_TIME_ASSERT(N%2 == 1);
+ DLIB_ASSERT(is_same_object(in_img,out_img) == false,
+ "\tvoid binary_open()"
+ << "\n\tYou must give two different image objects"
+ );
+ COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type>::grayscale);
+ DLIB_ASSERT(is_binary_image(in_img) ,
+ "\tvoid binary_open()"
+ << "\n\tin_img must be a binary image"
+ );
+ DLIB_ASSERT(is_binary_image(structuring_element) ,
+ "\tvoid binary_open()"
+ << "\n\tthe structuring_element must be a binary image"
+ );
+
+
+ // if there isn't any input image then don't do anything
+ if (num_rows(in_img)*num_columns(in_img) == 0)
+ {
+ set_image_size(out_img, 0,0);
+ return;
+ }
+
+ set_image_size(out_img, num_rows(in_img), num_columns(in_img));
+
+ if (iter == 0)
+ {
+ // just copy the image over
+ assign_image(out_img, in_img);
+ }
+ else if (iter == 1)
+ {
+ in_image_type temp;
+ binary_erosion(in_img,temp,structuring_element);
+ binary_dilation(temp,out_img,structuring_element);
+ }
+ else
+ {
+ in_image_type temp1, temp2;
+ binary_erosion(in_img,temp1,structuring_element);
+
+ // do the extra erosions
+ for (unsigned long i = 1; i < iter; ++i)
+ {
+ swap(temp1, temp2);
+ binary_erosion(temp2,temp1,structuring_element);
+ }
+
+ // do the extra dilations
+ for (unsigned long i = 1; i < iter; ++i)
+ {
+ swap(temp1, temp2);
+ binary_dilation(temp2,temp1,structuring_element);
+ }
+
+ binary_dilation(temp1,out_img,structuring_element);
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type,
+ long M,
+ long N
+ >
+ void binary_close (
+ const in_image_type& in_img,
+ out_image_type& out_img,
+ const unsigned char (&structuring_element)[M][N],
+ const unsigned long iter = 1
+ )
+ {
+ typedef typename image_traits<in_image_type>::pixel_type in_pixel_type;
+ typedef typename image_traits<out_image_type>::pixel_type out_pixel_type;
+ COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false );
+ COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false );
+
+
+ using namespace morphological_operations_helpers;
+ COMPILE_TIME_ASSERT(M%2 == 1);
+ COMPILE_TIME_ASSERT(N%2 == 1);
+ DLIB_ASSERT(is_same_object(in_img,out_img) == false,
+ "\tvoid binary_close()"
+ << "\n\tYou must give two different image objects"
+ );
+ COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type>::grayscale);
+ DLIB_ASSERT(is_binary_image(in_img) ,
+ "\tvoid binary_close()"
+ << "\n\tin_img must be a binary image"
+ );
+ DLIB_ASSERT(is_binary_image(structuring_element) ,
+ "\tvoid binary_close()"
+ << "\n\tthe structuring_element must be a binary image"
+ );
+
+
+ // if there isn't any input image then don't do anything
+ if (num_rows(in_img)*num_columns(in_img) == 0)
+ {
+ set_image_size(out_img, 0,0);
+ return;
+ }
+
+ set_image_size(out_img, num_rows(in_img), num_columns(in_img));
+
+ if (iter == 0)
+ {
+ // just copy the image over
+ assign_image(out_img, in_img);
+ }
+ else if (iter == 1)
+ {
+ in_image_type temp;
+ binary_dilation(in_img,temp,structuring_element);
+ binary_erosion(temp,out_img,structuring_element);
+ }
+ else
+ {
+ in_image_type temp1, temp2;
+ binary_dilation(in_img,temp1,structuring_element);
+
+ // do the extra dilations
+ for (unsigned long i = 1; i < iter; ++i)
+ {
+ swap(temp1, temp2);
+ binary_dilation(temp2,temp1,structuring_element);
+ }
+
+ // do the extra erosions
+ for (unsigned long i = 1; i < iter; ++i)
+ {
+ swap(temp1, temp2);
+ binary_erosion(temp2,temp1,structuring_element);
+ }
+
+ binary_erosion(temp1,out_img,structuring_element);
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type1,
+ typename in_image_type2,
+ typename out_image_type
+ >
+ void binary_intersection (
+ const in_image_type1& in_img1_,
+ const in_image_type2& in_img2_,
+ out_image_type& out_img_
+ )
+ {
+ typedef typename image_traits<in_image_type1>::pixel_type in_pixel_type1;
+ typedef typename image_traits<in_image_type2>::pixel_type in_pixel_type2;
+ typedef typename image_traits<out_image_type>::pixel_type out_pixel_type;
+ COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type1>::has_alpha == false );
+ COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type2>::has_alpha == false );
+ COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false );
+
+ using namespace morphological_operations_helpers;
+ COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type1>::grayscale);
+ COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type2>::grayscale);
+ DLIB_ASSERT(is_binary_image(in_img1_) ,
+ "\tvoid binary_intersection()"
+ << "\n\tin_img1 must be a binary image"
+ );
+ DLIB_ASSERT(is_binary_image(in_img2_) ,
+ "\tvoid binary_intersection()"
+ << "\n\tin_img2 must be a binary image"
+ );
+
+ const_image_view<in_image_type1> in_img1(in_img1_);
+ const_image_view<in_image_type2> in_img2(in_img2_);
+ image_view<out_image_type> out_img(out_img_);
+
+ DLIB_ASSERT(in_img1.nc() == in_img2.nc(),
+ "\tvoid binary_intersection()"
+ << "\n\tin_img1 and in_img2 must have the same ncs."
+ << "\n\tin_img1.nc(): " << in_img1.nc()
+ << "\n\tin_img2.nc(): " << in_img2.nc()
+ );
+ DLIB_ASSERT(in_img1.nr() == in_img2.nr(),
+ "\tvoid binary_intersection()"
+ << "\n\tin_img1 and in_img2 must have the same nrs."
+ << "\n\tin_img1.nr(): " << in_img1.nr()
+ << "\n\tin_img2.nr(): " << in_img2.nr()
+ );
+
+
+
+ // if there isn't any input image then don't do anything
+ if (in_img1.size() == 0)
+ {
+ out_img.clear();
+ return;
+ }
+
+ out_img.set_size(in_img1.nr(),in_img1.nc());
+
+ for (long r = 0; r < in_img1.nr(); ++r)
+ {
+ for (long c = 0; c < in_img1.nc(); ++c)
+ {
+ if (in_img1[r][c] == on_pixel && in_img2[r][c] == on_pixel)
+ assign_pixel(out_img[r][c], on_pixel);
+ else
+ assign_pixel(out_img[r][c], off_pixel);
+ }
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type1,
+ typename in_image_type2,
+ typename out_image_type
+ >
+ void binary_union (
+ const in_image_type1& in_img1_,
+ const in_image_type2& in_img2_,
+ out_image_type& out_img_
+ )
+ {
+ typedef typename image_traits<in_image_type1>::pixel_type in_pixel_type1;
+ typedef typename image_traits<in_image_type2>::pixel_type in_pixel_type2;
+ typedef typename image_traits<out_image_type>::pixel_type out_pixel_type;
+ COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type1>::has_alpha == false );
+ COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type2>::has_alpha == false );
+ COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false );
+
+
+ using namespace morphological_operations_helpers;
+ COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type1>::grayscale);
+ COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type2>::grayscale);
+ DLIB_ASSERT(is_binary_image(in_img1_) ,
+ "\tvoid binary_intersection()"
+ << "\n\tin_img1 must be a binary image"
+ );
+ DLIB_ASSERT(is_binary_image(in_img2_) ,
+ "\tvoid binary_intersection()"
+ << "\n\tin_img2 must be a binary image"
+ );
+
+ const_image_view<in_image_type1> in_img1(in_img1_);
+ const_image_view<in_image_type2> in_img2(in_img2_);
+ image_view<out_image_type> out_img(out_img_);
+
+ DLIB_ASSERT(in_img1.nc() == in_img2.nc(),
+ "\tvoid binary_intersection()"
+ << "\n\tin_img1 and in_img2 must have the same ncs."
+ << "\n\tin_img1.nc(): " << in_img1.nc()
+ << "\n\tin_img2.nc(): " << in_img2.nc()
+ );
+ DLIB_ASSERT(in_img1.nr() == in_img2.nr(),
+ "\tvoid binary_intersection()"
+ << "\n\tin_img1 and in_img2 must have the same nrs."
+ << "\n\tin_img1.nr(): " << in_img1.nr()
+ << "\n\tin_img2.nr(): " << in_img2.nr()
+ );
+
+
+
+ // if there isn't any input image then don't do anything
+ if (in_img1.size() == 0)
+ {
+ out_img.clear();
+ return;
+ }
+
+ out_img.set_size(in_img1.nr(),in_img1.nc());
+
+ for (long r = 0; r < in_img1.nr(); ++r)
+ {
+ for (long c = 0; c < in_img1.nc(); ++c)
+ {
+ if (in_img1[r][c] == on_pixel || in_img2[r][c] == on_pixel)
+ assign_pixel(out_img[r][c], on_pixel);
+ else
+ assign_pixel(out_img[r][c], off_pixel);
+ }
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type1,
+ typename in_image_type2,
+ typename out_image_type
+ >
+ void binary_difference (
+ const in_image_type1& in_img1_,
+ const in_image_type2& in_img2_,
+ out_image_type& out_img_
+ )
+ {
+ typedef typename image_traits<in_image_type1>::pixel_type in_pixel_type1;
+ typedef typename image_traits<in_image_type2>::pixel_type in_pixel_type2;
+ typedef typename image_traits<out_image_type>::pixel_type out_pixel_type;
+ COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type1>::has_alpha == false );
+ COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type2>::has_alpha == false );
+ COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false );
+
+ using namespace morphological_operations_helpers;
+ COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type1>::grayscale);
+ COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type2>::grayscale);
+ DLIB_ASSERT(is_binary_image(in_img1_) ,
+ "\tvoid binary_difference()"
+ << "\n\tin_img1 must be a binary image"
+ );
+ DLIB_ASSERT(is_binary_image(in_img2_) ,
+ "\tvoid binary_difference()"
+ << "\n\tin_img2 must be a binary image"
+ );
+
+ const_image_view<in_image_type1> in_img1(in_img1_);
+ const_image_view<in_image_type2> in_img2(in_img2_);
+ image_view<out_image_type> out_img(out_img_);
+
+ DLIB_ASSERT(in_img1.nc() == in_img2.nc(),
+ "\tvoid binary_difference()"
+ << "\n\tin_img1 and in_img2 must have the same ncs."
+ << "\n\tin_img1.nc(): " << in_img1.nc()
+ << "\n\tin_img2.nc(): " << in_img2.nc()
+ );
+ DLIB_ASSERT(in_img1.nr() == in_img2.nr(),
+ "\tvoid binary_difference()"
+ << "\n\tin_img1 and in_img2 must have the same nrs."
+ << "\n\tin_img1.nr(): " << in_img1.nr()
+ << "\n\tin_img2.nr(): " << in_img2.nr()
+ );
+
+
+
+ // if there isn't any input image then don't do anything
+ if (in_img1.size() == 0)
+ {
+ out_img.clear();
+ return;
+ }
+
+ out_img.set_size(in_img1.nr(),in_img1.nc());
+
+ for (long r = 0; r < in_img1.nr(); ++r)
+ {
+ for (long c = 0; c < in_img1.nc(); ++c)
+ {
+ if (in_img1[r][c] == on_pixel && in_img2[r][c] == off_pixel)
+ assign_pixel(out_img[r][c], on_pixel);
+ else
+ assign_pixel(out_img[r][c], off_pixel);
+ }
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type
+ >
+ void binary_complement (
+ const in_image_type& in_img_,
+ out_image_type& out_img_
+ )
+ {
+ typedef typename image_traits<in_image_type>::pixel_type in_pixel_type;
+ typedef typename image_traits<out_image_type>::pixel_type out_pixel_type;
+ COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false );
+ COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false );
+
+
+ using namespace morphological_operations_helpers;
+ COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type>::grayscale);
+ DLIB_ASSERT(is_binary_image(in_img_) ,
+ "\tvoid binary_complement()"
+ << "\n\tin_img must be a binary image"
+ );
+
+ const_image_view<in_image_type> in_img(in_img_);
+ image_view<out_image_type> out_img(out_img_);
+
+ // if there isn't any input image then don't do anything
+ if (in_img.size() == 0)
+ {
+ out_img.clear();
+ return;
+ }
+
+ out_img.set_size(in_img.nr(),in_img.nc());
+
+ for (long r = 0; r < in_img.nr(); ++r)
+ {
+ for (long c = 0; c < in_img.nc(); ++c)
+ {
+ if (in_img[r][c] == on_pixel)
+ assign_pixel(out_img[r][c], off_pixel);
+ else
+ assign_pixel(out_img[r][c], on_pixel);
+ }
+ }
+ }
+
+ template <
+ typename image_type
+ >
+ void binary_complement (
+ image_type& img
+ )
+ {
+ binary_complement(img,img);
+ }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ namespace impl
+ {
+ template <typename image_type>
+ inline bool should_remove_pixel (
+ const image_type& img,
+ long r,
+ long c,
+ int iter
+ )
+ {
+ unsigned int p2 = img[r-1][c];
+ unsigned int p3 = img[r-1][c+1];
+ unsigned int p4 = img[r][c+1];
+ unsigned int p5 = img[r+1][c+1];
+ unsigned int p6 = img[r+1][c];
+ unsigned int p7 = img[r+1][c-1];
+ unsigned int p8 = img[r][c-1];
+ unsigned int p9 = img[r-1][c-1];
+
+ int A = (p2 == 0 && p3 == 255) + (p3 == 0 && p4 == 255) +
+ (p4 == 0 && p5 == 255) + (p5 == 0 && p6 == 255) +
+ (p6 == 0 && p7 == 255) + (p7 == 0 && p8 == 255) +
+ (p8 == 0 && p9 == 255) + (p9 == 0 && p2 == 255);
+ int B = p2 + p3 + p4 + p5 + p6 + p7 + p8 + p9;
+ int m1 = iter == 0 ? (p2 * p4 * p6) : (p2 * p4 * p8);
+ int m2 = iter == 0 ? (p4 * p6 * p8) : (p2 * p6 * p8);
+ // Decide if we should remove the pixel img[r][c].
+ return (A == 1 && (B >= 2*255 && B <= 6*255) && m1 == 0 && m2 == 0);
+ }
+
+ template <typename image_type>
+ inline void add_to_remove (
+ std::vector<point>& to_remove,
+ array2d<unsigned char>& marker,
+ const image_type& img,
+ long r,
+ long c,
+ int iter
+ )
+ {
+ if (marker[r][c]&&should_remove_pixel(img,r,c,iter))
+ {
+ to_remove.push_back(point(c,r));
+ marker[r][c] = 0;
+ }
+ }
+
+ template <typename image_type>
+ inline bool is_bw_border_pixel(
+ const image_type& img,
+ long r,
+ long c
+ )
+ {
+ unsigned int p2 = img[r-1][c];
+ unsigned int p3 = img[r-1][c+1];
+ unsigned int p4 = img[r][c+1];
+ unsigned int p5 = img[r+1][c+1];
+ unsigned int p6 = img[r+1][c];
+ unsigned int p7 = img[r+1][c-1];
+ unsigned int p8 = img[r][c-1];
+ unsigned int p9 = img[r-1][c-1];
+
+ int B = p2 + p3 + p4 + p5 + p6 + p7 + p8 + p9;
+ // If you are on but at least one of your neighbors isn't.
+ return B<8*255 && img[r][c];
+
+ }
+
+ inline void add_if(
+ std::vector<point>& to_check2,
+ const array2d<unsigned char>& marker,
+ long c,
+ long r
+ )
+ {
+ if (marker[r][c])
+ to_check2.push_back(point(c,r));
+ }
+
+ } // end namespace impl
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type
+ >
+ void skeleton(
+ image_type& img_
+ )
+ {
+ /*
+ The implementation of this function is based on the paper
+ "A fast parallel algorithm for thinning digital patterns” by T.Y. Zhang and C.Y. Suen.
+ and also the excellent discussion of it at:
+ http://opencv-code.com/quick-tips/implementation-of-thinning-algorithm-in-opencv/
+ */
+
+ typedef typename image_traits<image_type>::pixel_type pixel_type;
+
+ // This function only works on grayscale images
+ COMPILE_TIME_ASSERT(pixel_traits<pixel_type>::grayscale);
+
+ using namespace impl;
+ // Note that it's important to zero the border for 2 reasons. First, it allows
+ // thinning to being at the border of the image. But more importantly, it causes
+ // the mask to have a border of 0 pixels as well which we use later to avoid
+ // indexing outside the image inside add_to_remove().
+ zero_border_pixels(img_,1,1);
+ image_view<image_type> img(img_);
+
+ // We use the marker to keep track of pixels we have committed to removing but
+ // haven't yet removed from img.
+ array2d<unsigned char> marker(img.nr(), img.nc());
+ assign_image(marker, img);
+
+
+ // Begin by making a list of the pixels on the borders of binary blobs.
+ std::vector<point> to_remove, to_check, to_check2;
+ for (int r = 1; r < img.nr()-1; r++)
+ {
+ for (int c = 1; c < img.nc()-1; c++)
+ {
+ if (is_bw_border_pixel(img, r, c))
+ {
+ to_check.push_back(point(c,r));
+ }
+ }
+ }
+
+ // Now start iteratively looking at the border pixels and removing them.
+ while(to_check.size() != 0)
+ {
+ for (int iter = 0; iter <= 1; ++iter)
+ {
+ // Check which pixels we should remove
+ to_remove.clear();
+ for (unsigned long i = 0; i < to_check.size(); ++i)
+ {
+ long r = to_check[i].y();
+ long c = to_check[i].x();
+ add_to_remove(to_remove, marker, img, r, c, iter);
+ }
+ for (unsigned long i = 0; i < to_check2.size(); ++i)
+ {
+ long r = to_check2[i].y();
+ long c = to_check2[i].x();
+ add_to_remove(to_remove, marker, img, r, c, iter);
+ }
+ // Now remove those pixels. Also add their neighbors into the "to check"
+ // pixel list for the next iteration.
+ for (unsigned long i = 0; i < to_remove.size(); ++i)
+ {
+ long r = to_remove[i].y();
+ long c = to_remove[i].x();
+ // remove the pixel
+ img[r][c] = 0;
+ add_if(to_check2, marker, c-1, r-1);
+ add_if(to_check2, marker, c, r-1);
+ add_if(to_check2, marker, c+1, r-1);
+ add_if(to_check2, marker, c-1, r);
+ add_if(to_check2, marker, c+1, r);
+ add_if(to_check2, marker, c-1, r+1);
+ add_if(to_check2, marker, c, r+1);
+ add_if(to_check2, marker, c+1, r+1);
+ }
+ }
+ to_check.clear();
+ to_check.swap(to_check2);
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_MORPHOLOGICAL_OPERATIONs_
+
diff --git a/ml/dlib/dlib/image_transforms/morphological_operations_abstract.h b/ml/dlib/dlib/image_transforms/morphological_operations_abstract.h
new file mode 100644
index 000000000..c69bdd1ca
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/morphological_operations_abstract.h
@@ -0,0 +1,316 @@
+// Copyright (C) 2006 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_MORPHOLOGICAL_OPERATIONs_ABSTRACT_
+#ifdef DLIB_MORPHOLOGICAL_OPERATIONs_ABSTRACT_
+
+#include "../pixel.h"
+#include "thresholding_abstract.h"
+#include "../image_processing/generic_image.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type,
+ long M,
+ long N
+ >
+ void binary_dilation (
+ const in_image_type& in_img,
+ out_image_type& out_img,
+ const unsigned char (&structuring_element)[M][N]
+ );
+ /*!
+ requires
+ - in_image_type and out_image_type are image objects that implement the
+ interface defined in dlib/image_processing/generic_image.h
+ - in_img must contain a grayscale pixel type.
+ - both in_img and out_img must contain pixels with no alpha channel.
+ (i.e. pixel_traits::has_alpha==false for their pixels)
+ - is_same_object(in_img,out_img) == false
+ - M % 2 == 1 (i.e. M must be odd)
+ - N % 2 == 1 (i.e. N must be odd)
+ - all pixels in in_img are set to either on_pixel or off_pixel
+ (i.e. it must be a binary image)
+ - all pixels in structuring_element are set to either on_pixel or off_pixel
+ (i.e. it must be a binary image)
+ ensures
+ - Does a binary dilation of in_img using the given structuring element and
+ stores the result in out_img.
+ - #out_img.nc() == in_img.nc()
+ - #out_img.nr() == in_img.nr()
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type,
+ long M,
+ long N
+ >
+ void binary_erosion (
+ const in_image_type& in_img,
+ out_image_type& out_img,
+ const unsigned char (&structuring_element)[M][N]
+ );
+ /*!
+ requires
+ - in_image_type and out_image_type are image objects that implement the
+ interface defined in dlib/image_processing/generic_image.h
+ - in_img must contain a grayscale pixel type.
+ - both in_img and out_img must contain pixels with no alpha channel.
+ (i.e. pixel_traits::has_alpha==false for their pixels)
+ - is_same_object(in_img,out_img) == false
+ - M % 2 == 1 (i.e. M must be odd)
+ - N % 2 == 1 (i.e. N must be odd)
+ - all pixels in in_img are set to either on_pixel or off_pixel
+ (i.e. it must be a binary image)
+ - all pixels in structuring_element are set to either on_pixel or off_pixel
+ (i.e. it must be a binary image)
+ ensures
+ - Does a binary erosion of in_img using the given structuring element and
+ stores the result in out_img.
+ - #out_img.nc() == in_img.nc()
+ - #out_img.nr() == in_img.nr()
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type,
+ long M,
+ long N
+ >
+ void binary_open (
+ const in_image_type& in_img,
+ out_image_type& out_img,
+ const unsigned char (&structuring_element)[M][N],
+ const unsigned long iter = 1
+ );
+ /*!
+ requires
+ - in_image_type and out_image_type are image objects that implement the
+ interface defined in dlib/image_processing/generic_image.h
+ - in_img must contain a grayscale pixel type.
+ - both in_img and out_img must contain pixels with no alpha channel.
+ (i.e. pixel_traits::has_alpha==false for their pixels)
+ - is_same_object(in_img,out_img) == false
+ - M % 2 == 1 (i.e. M must be odd)
+ - N % 2 == 1 (i.e. N must be odd)
+ - all pixels in in_img are set to either on_pixel or off_pixel
+ (i.e. it must be a binary image)
+ - all pixels in structuring_element are set to either on_pixel or off_pixel
+ (i.e. it must be a binary image)
+ ensures
+ - Does a binary open of in_img using the given structuring element and
+ stores the result in out_img. Specifically, iter iterations of binary
+ erosion are applied and then iter iterations of binary dilation.
+ - #out_img.nc() == in_img.nc()
+ - #out_img.nr() == in_img.nr()
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type,
+ long M,
+ long N
+ >
+ void binary_close (
+ const in_image_type& in_img,
+ out_image_type& out_img,
+ const unsigned char (&structuring_element)[M][N],
+ const unsigned long iter = 1
+ );
+ /*!
+ requires
+ - in_image_type and out_image_type are image objects that implement the
+ interface defined in dlib/image_processing/generic_image.h
+ - in_img must contain a grayscale pixel type.
+ - both in_img and out_img must contain pixels with no alpha channel.
+ (i.e. pixel_traits::has_alpha==false for their pixels)
+ - is_same_object(in_img,out_img) == false
+ - M % 2 == 1 (i.e. M must be odd)
+ - N % 2 == 1 (i.e. N must be odd)
+ - all pixels in in_img are set to either on_pixel or off_pixel
+ (i.e. it must be a binary image)
+ - all pixels in structuring_element are set to either on_pixel or off_pixel
+ (i.e. it must be a binary image)
+ ensures
+ - Does a binary close of in_img using the given structuring element and
+ stores the result in out_img. Specifically, iter iterations of binary
+ dilation are applied and then iter iterations of binary erosion.
+ - #out_img.nc() == in_img.nc()
+ - #out_img.nr() == in_img.nr()
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type1,
+ typename in_image_type2,
+ typename out_image_type
+ >
+ void binary_intersection (
+ const in_image_type1& in_img1,
+ const in_image_type2& in_img2,
+ out_image_type& out_img
+ );
+ /*!
+ requires
+ - in_image_type1, in_image_type2, and out_image_type are image objects that
+ implement the interface defined in dlib/image_processing/generic_image.h
+ - in_img1 and in_img2 must contain grayscale pixel types.
+ - in_img1, in_img2, and out_img must contain pixels with no alpha channel.
+ (i.e. pixel_traits::has_alpha==false for their pixels)
+ - all pixels in in_img1 and in_img2 are set to either on_pixel or off_pixel
+ (i.e. they must be binary images)
+ - in_img1.nc() == in_img2.nc()
+ - in_img1.nr() == in_img2.nr()
+ ensures
+ - #out_img == the binary intersection of in_img1 and in_img2. (i.e. All
+ the pixels that are set to on_pixel in both in_img1 and in_img2 will be set
+ to on_pixel in #out_img. All other pixels will be set to off_pixel)
+ - #out_img.nc() == in_img.nc()
+ - #out_img.nr() == in_img.nr()
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type1,
+ typename in_image_type2,
+ typename out_image_type
+ >
+ void binary_union (
+ const in_image_type1& in_img1,
+ const in_image_type2& in_img2,
+ out_image_type& out_img
+ );
+ /*!
+ requires
+ - in_image_type1, in_image_type2, and out_image_type are image objects that
+ implement the interface defined in dlib/image_processing/generic_image.h
+ - in_img1 and in_img2 must contain grayscale pixel types.
+ - in_img1, in_img2, and out_img must contain pixels with no alpha channel.
+ (i.e. pixel_traits::has_alpha==false for their pixels)
+ - all pixels in in_img1 and in_img2 are set to either on_pixel or off_pixel
+ (i.e. they must be binary images)
+ - in_img1.nc() == in_img2.nc()
+ - in_img1.nr() == in_img2.nr()
+ ensures
+ - #out_img == the binary union of in_img1 and in_img2. (i.e. All
+ the pixels that are set to on_pixel in in_img1 and/or in_img2 will be set
+ to on_pixel in #out_img. All other pixels will be set to off_pixel)
+ - #out_img.nc() == in_img.nc()
+ - #out_img.nr() == in_img.nr()
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type1,
+ typename in_image_type2,
+ typename out_image_type
+ >
+ void binary_difference (
+ const in_image_type1& in_img1,
+ const in_image_type2& in_img2,
+ out_image_type& out_img
+ );
+ /*!
+ requires
+ - in_image_type1, in_image_type2, and out_image_type are image objects that
+ implement the interface defined in dlib/image_processing/generic_image.h
+ - in_img1 and in_img2 must contain grayscale pixel types.
+ - in_img1, in_img2, and out_img must contain pixels with no alpha channel.
+ (i.e. pixel_traits::has_alpha==false for their pixels)
+ - all pixels in in_img1 and in_img2 are set to either on_pixel or off_pixel
+ (i.e. they must be binary images)
+ - in_img1.nc() == in_img2.nc()
+ - in_img1.nr() == in_img2.nr()
+ ensures
+ - #out_img == the binary difference of in_img1 and in_img2. (i.e. #out_img
+ will be a copy of in_img1 except that any pixels in in_img2 that are set to
+ on_pixel will be set to off_pixel)
+ - #out_img.nc() == in_img.nc()
+ - #out_img.nr() == in_img.nr()
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type
+ >
+ void binary_complement (
+ const in_image_type& in_img,
+ out_image_type& out_img
+ );
+ /*!
+ requires
+ - in_image_type and out_image_type are image objects that implement the
+ interface defined in dlib/image_processing/generic_image.h
+ - in_img must contain a grayscale pixel type.
+ - both in_img and out_img must contain pixels with no alpha channel.
+ (i.e. pixel_traits::has_alpha==false for their pixels)
+ - all pixels in in_img are set to either on_pixel or off_pixel
+ (i.e. it must be a binary image)
+ ensures
+ - #out_img == the binary complement of in_img. (i.e. For each pixel in
+ in_img, if it is on_pixel then it will be set to off_pixel in #out_img and
+ if it was off_pixel in in_img then it will be on_pixel in #out_img)
+ - #out_img.nc() == in_img.nc()
+ - #out_img.nr() == in_img.nr()
+ !*/
+
+ template <
+ typename image_type
+ >
+ void binary_complement (
+ image_type& img
+ );
+ /*!
+ requires
+ - it must be valid to call binary_complement(img,img);
+ ensures
+ - calls binary_complement(img,img);
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type
+ >
+ void skeleton(
+ image_type& img
+ );
+ /*!
+ requires
+ - image_type is an object that implement the interface defined in
+ dlib/image_processing/generic_image.h
+ - img must contain a grayscale pixel type.
+ - all pixels in img are set to either on_pixel or off_pixel.
+ (i.e. it must be a binary image)
+ ensures
+ - This function computes the skeletonization of img and stores the result in
+ #img. That is, given a binary image, we progressively thin the binary blobs
+ (composed of on_pixel values) until only a single pixel wide skeleton of the
+ original blobs remains.
+ - #img.nc() == img.nc()
+ - #img.nr() == img.nr()
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_MORPHOLOGICAL_OPERATIONs_ABSTRACT_
+
+
diff --git a/ml/dlib/dlib/image_transforms/random_color_transform.h b/ml/dlib/dlib/image_transforms/random_color_transform.h
new file mode 100644
index 000000000..7433da1f7
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/random_color_transform.h
@@ -0,0 +1,157 @@
+// Copyright (C) 2016 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_RANDOM_cOLOR_TRANSFORM_Hh_
+#define DLIB_RANDOM_cOLOR_TRANSFORM_Hh_
+
+#include "random_color_transform_abstract.h"
+#include "../image_processing/generic_image.h"
+#include "../pixel.h"
+#include "../rand.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ class random_color_transform
+ {
+ public:
+
+ random_color_transform (
+ dlib::rand& rnd,
+ const double gamma_magnitude = 0.5,
+ const double color_magnitude = 0.2
+ )
+ {
+ // pick a random gamma correction factor.
+ double gamma = std::max(0.0, 1 + gamma_magnitude*(rnd.get_random_double()-0.5));
+
+ // pick a random color balancing scheme.
+ double red_scale = 1-rnd.get_random_double()*color_magnitude;
+ double green_scale = 1-rnd.get_random_double()*color_magnitude;
+ double blue_scale = 1-rnd.get_random_double()*color_magnitude;
+ const double m = 255*std::max(std::max(red_scale,green_scale),blue_scale);
+ red_scale /= m;
+ green_scale /= m;
+ blue_scale /= m;
+
+ // Now compute a lookup table for all the color channels. The table tells us
+ // what the transform does.
+ table.resize(256*3);
+ unsigned long i = 0;
+ for (int k = 0; k < 256; ++k)
+ {
+ double v = 255*std::pow(k*red_scale, gamma);
+ table[i++] = (unsigned char)(v + 0.5);
+ }
+ for (int k = 0; k < 256; ++k)
+ {
+ double v = 255*std::pow(k*green_scale, gamma);
+ table[i++] = (unsigned char)(v + 0.5);
+ }
+ for (int k = 0; k < 256; ++k)
+ {
+ double v = 255*std::pow(k*blue_scale, gamma);
+ table[i++] = (unsigned char)(v + 0.5);
+ }
+ }
+
+ rgb_pixel operator()(rgb_pixel p) const
+ {
+ p.red = table[(unsigned int)p.red];
+ p.green = table[(unsigned int)p.green+256];
+ p.blue = table[(unsigned int)p.blue+512];
+ return p;
+ }
+
+ private:
+ std::vector<unsigned char> table;
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ template <typename image_type>
+ void disturb_colors (
+ image_type& img_,
+ dlib::rand& rnd,
+ const double gamma_magnitude = 0.5,
+ const double color_magnitude = 0.2
+ )
+ {
+ image_view<image_type> img(img_);
+ random_color_transform tform(rnd, gamma_magnitude, color_magnitude);
+ for (long r = 0; r < img.nr(); ++r)
+ {
+ for (long c = 0; c < img.nc(); ++c)
+ {
+ rgb_pixel temp;
+ assign_pixel(temp, img[r][c]);
+ temp = tform(temp);
+ assign_pixel(img[r][c], temp);
+ }
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <typename image_type>
+ void apply_random_color_offset (
+ image_type& img_,
+ dlib::rand& rnd
+ )
+ {
+ // Make a random color offset. This tform matrix came from looking at the
+ // covariance matrix of RGB values in a bunch of images. In particular, if you
+ // multiply Gaussian random vectors by tform it will result in vectors with the
+ // same covariance matrix as the original RGB data. Also, this color transform is
+ // what is suggested by the paper:
+ // Krizhevsky, Alex, Ilya Sutskever, and Geoffrey E. Hinton. "Imagenet
+ // classification with deep convolutional neural networks." Advances in neural
+ // information processing systems. 2012.
+ // Except that we used the square root of the eigenvalues (which I'm pretty sure is
+ // what the authors intended).
+ matrix<double,3,3> tform;
+ tform = -66.379, 25.094, 6.79698,
+ -68.0492, -0.302309, -13.9539,
+ -68.4907, -24.0199, 7.27653;
+ matrix<double,3,1> v;
+ v = rnd.get_random_gaussian(),rnd.get_random_gaussian(),rnd.get_random_gaussian();
+ v = round(tform*0.1*v);
+ const int roffset = v(0);
+ const int goffset = v(1);
+ const int boffset = v(2);
+
+ // Make up lookup tables that apply the color mapping so we don't have to put a
+ // bunch of complicated conditional branches in the loop below.
+ unsigned char rtable[256];
+ unsigned char gtable[256];
+ unsigned char btable[256];
+ for (int i = 0; i < 256; ++i)
+ {
+ rtable[i] = put_in_range(0, 255, i+roffset);
+ gtable[i] = put_in_range(0, 255, i+goffset);
+ btable[i] = put_in_range(0, 255, i+boffset);
+ }
+
+ // now transform the image.
+ image_view<image_type> img(img_);
+ for (long r = 0; r < img.nr(); ++r)
+ {
+ for (long c = 0; c < img.nc(); ++c)
+ {
+ rgb_pixel temp;
+ assign_pixel(temp, img[r][c]);
+ temp.red = rtable[temp.red];
+ temp.green = gtable[temp.green];
+ temp.blue = btable[temp.blue];
+ assign_pixel(img[r][c], temp);
+ }
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_RANDOM_cOLOR_TRANSFORM_Hh_
+
diff --git a/ml/dlib/dlib/image_transforms/random_color_transform_abstract.h b/ml/dlib/dlib/image_transforms/random_color_transform_abstract.h
new file mode 100644
index 000000000..5826e16a6
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/random_color_transform_abstract.h
@@ -0,0 +1,94 @@
+// Copyright (C) 2016 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_RANDOM_cOLOR_TRANSFORM_ABSTRACT_Hh_
+#ifdef DLIB_RANDOM_cOLOR_TRANSFORM_ABSTRACT_Hh_
+
+#include "../image_processing/generic_image.h"
+#include "../pixel.h"
+#include "../rand.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ class random_color_transform
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This object generates a random color balancing and gamma correction
+ transform. It then allows you to apply that specific transform to as many
+ rgb_pixel objects as you like.
+ !*/
+
+ public:
+
+ random_color_transform (
+ dlib::rand& rnd,
+ const double gamma_magnitude = 0.5,
+ const double color_magnitude = 0.2
+ );
+ /*!
+ requires
+ - 0 <= gamma_magnitude
+ - 0 <= color_magnitude <= 1
+ ensures
+ - This constructor generates a random color transform which can be applied
+ by calling this object's operator() method.
+ - The color transform is a gamma correction and color rebalancing. If
+ gamma_magnitude == 0 and color_magnitude == 0 then the transform doesn't
+ change any colors at all. However, the larger these parameters the more
+ noticeable the resulting transform.
+ !*/
+
+ rgb_pixel operator()(
+ rgb_pixel p
+ ) const;
+ /*!
+ ensures
+ - returns the color transformed version of p.
+ !*/
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ template <typename image_type>
+ void disturb_colors (
+ image_type& img,
+ dlib::rand& rnd,
+ const double gamma_magnitude = 0.5,
+ const double color_magnitude = 0.2
+ );
+ /*!
+ requires
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ ensures
+ - Applies a random color transform to the given image. This is done by
+ creating a random_color_transform with the given parameters and then
+ transforming each pixel in the image with the resulting transform.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <typename image_type>
+ void apply_random_color_offset (
+ image_type& img,
+ dlib::rand& rnd
+ );
+ /*!
+ ensures
+ - Picks a random color offset vector and adds it to the given image. The offset
+ vector is selected using the method described in the paper:
+ Krizhevsky, Alex, Ilya Sutskever, and Geoffrey E. Hinton. "Imagenet
+ classification with deep convolutional neural networks." Advances in neural
+ information processing systems. 2012.
+ In particular, we sample an RGB value from the typical distribution of RGB
+ values, assuming it has a Gaussian distribution, and then divide it by 10.
+ This sampled RGB vector is added to each pixel of img.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+#endif // DLIB_RANDOM_cOLOR_TRANSFORM_ABSTRACT_Hh_
+
diff --git a/ml/dlib/dlib/image_transforms/random_cropper.h b/ml/dlib/dlib/image_transforms/random_cropper.h
new file mode 100644
index 000000000..2c754b608
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/random_cropper.h
@@ -0,0 +1,361 @@
+// Copyright (C) 2016 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_RaNDOM_CROPPER_H_
+#define DLIB_RaNDOM_CROPPER_H_
+
+#include "random_cropper_abstract.h"
+#include "../threads.h"
+#include <mutex>
+#include <vector>
+#include "interpolation.h"
+#include "../image_processing/full_object_detection.h"
+#include "../rand.h"
+
+namespace dlib
+{
+ class random_cropper
+ {
+ chip_dims dims = chip_dims(300,300);
+ bool randomly_flip = true;
+ double max_rotation_degrees = 30;
+ long min_object_length_long_dim = 75; // cropped object will be at least this many pixels along its longest edge.
+ long min_object_length_short_dim = 30; // cropped object will be at least this many pixels along its shortest edge.
+ double max_object_size = 0.7; // cropped object will be at most this fraction of the size of the image.
+ double background_crops_fraction = 0.5;
+ double translate_amount = 0.10;
+
+ std::mutex rnd_mutex;
+ dlib::rand rnd;
+ public:
+
+ void set_seed (
+ time_t seed
+ ) { rnd = dlib::rand(seed); }
+
+ double get_translate_amount (
+ ) const { return translate_amount; }
+
+ void set_translate_amount (
+ double value
+ )
+ {
+ DLIB_CASSERT(0 <= value);
+ translate_amount = value;
+ }
+
+ double get_background_crops_fraction (
+ ) const { return background_crops_fraction; }
+
+ void set_background_crops_fraction (
+ double value
+ )
+ {
+ DLIB_CASSERT(0 <= value && value <= 1);
+ background_crops_fraction = value;
+ }
+
+ const chip_dims& get_chip_dims(
+ ) const { return dims; }
+
+ void set_chip_dims (
+ const chip_dims& dims_
+ ) { dims = dims_; }
+
+ void set_chip_dims (
+ unsigned long rows,
+ unsigned long cols
+ ) { set_chip_dims(chip_dims(rows,cols)); }
+
+ bool get_randomly_flip (
+ ) const { return randomly_flip; }
+
+ void set_randomly_flip (
+ bool value
+ ) { randomly_flip = value; }
+
+ double get_max_rotation_degrees (
+ ) const { return max_rotation_degrees; }
+ void set_max_rotation_degrees (
+ double value
+ ) { max_rotation_degrees = std::abs(value); }
+
+ long get_min_object_length_long_dim (
+ ) const { return min_object_length_long_dim; }
+ long get_min_object_length_short_dim (
+ ) const { return min_object_length_short_dim; }
+
+ void set_min_object_size (
+ long long_dim,
+ long short_dim
+ )
+ {
+ DLIB_CASSERT(0 < short_dim && short_dim <= long_dim);
+ min_object_length_long_dim = long_dim;
+ min_object_length_short_dim = short_dim;
+ }
+
+ double get_max_object_size (
+ ) const { return max_object_size; }
+ void set_max_object_size (
+ double value
+ )
+ {
+ DLIB_CASSERT(0 < value);
+ max_object_size = value;
+ }
+
+ template <
+ typename array_type
+ >
+ void operator() (
+ size_t num_crops,
+ const array_type& images,
+ const std::vector<std::vector<mmod_rect>>& rects,
+ array_type& crops,
+ std::vector<std::vector<mmod_rect>>& crop_rects
+ )
+ {
+ DLIB_CASSERT(images.size() == rects.size());
+ crops.clear();
+ crop_rects.clear();
+ append(num_crops, images, rects, crops, crop_rects);
+ }
+
+ template <
+ typename array_type
+ >
+ void append (
+ size_t num_crops,
+ const array_type& images,
+ const std::vector<std::vector<mmod_rect>>& rects,
+ array_type& crops,
+ std::vector<std::vector<mmod_rect>>& crop_rects
+ )
+ {
+ DLIB_CASSERT(images.size() == rects.size());
+ DLIB_CASSERT(crops.size() == crop_rects.size());
+ auto original_size = crops.size();
+ crops.resize(crops.size()+num_crops);
+ crop_rects.resize(crop_rects.size()+num_crops);
+ parallel_for(original_size, original_size+num_crops, [&](long i) {
+ (*this)(images, rects, crops[i], crop_rects[i]);
+ });
+ }
+
+
+ template <
+ typename array_type,
+ typename image_type
+ >
+ void operator() (
+ const array_type& images,
+ const std::vector<std::vector<mmod_rect>>& rects,
+ image_type& crop,
+ std::vector<mmod_rect>& crop_rects
+ )
+ {
+ DLIB_CASSERT(images.size() == rects.size());
+ size_t idx;
+ { std::lock_guard<std::mutex> lock(rnd_mutex);
+ idx = rnd.get_integer(images.size());
+ }
+ (*this)(images[idx], rects[idx], crop, crop_rects);
+ }
+
+ template <
+ typename image_type1
+ >
+ image_type1 operator() (
+ const image_type1& img
+ )
+ {
+ image_type1 crop;
+ std::vector<mmod_rect> junk1, junk2;
+ (*this)(img, junk1, crop, junk2);
+ return crop;
+ }
+
+ template <
+ typename image_type1,
+ typename image_type2
+ >
+ void operator() (
+ const image_type1& img,
+ const std::vector<mmod_rect>& rects,
+ image_type2& crop,
+ std::vector<mmod_rect>& crop_rects
+ )
+ {
+ DLIB_CASSERT(num_rows(img)*num_columns(img) != 0);
+ chip_details crop_plan;
+ bool should_flip_crop;
+ make_crop_plan(img, rects, crop_plan, should_flip_crop);
+
+ extract_image_chip(img, crop_plan, crop);
+ const rectangle_transform tform = get_mapping_to_chip(crop_plan);
+
+ // copy rects into crop_rects and set ones that are outside the crop to ignore or
+ // drop entirely as appropriate.
+ crop_rects.clear();
+ for (auto rect : rects)
+ {
+ // map to crop
+ rect.rect = tform(rect.rect);
+
+ // if the rect is at least partly in the crop
+ if (get_rect(crop).intersect(rect.rect).area() != 0)
+ {
+ // set to ignore if not totally in the crop or if too small.
+ if (!get_rect(crop).contains(rect.rect) ||
+ ((long)rect.rect.height() < min_object_length_long_dim && (long)rect.rect.width() < min_object_length_long_dim) ||
+ ((long)rect.rect.height() < min_object_length_short_dim || (long)rect.rect.width() < min_object_length_short_dim))
+ {
+ rect.ignore = true;
+ }
+
+ crop_rects.push_back(rect);
+ }
+ }
+
+ // Also randomly flip the image
+ if (should_flip_crop)
+ {
+ image_type2 temp;
+ flip_image_left_right(crop, temp);
+ swap(crop,temp);
+ for (auto&& rect : crop_rects)
+ rect.rect = impl::flip_rect_left_right(rect.rect, get_rect(crop));
+ }
+ }
+
+ private:
+
+ template <typename image_type1>
+ void make_crop_plan (
+ const image_type1& img,
+ const std::vector<mmod_rect>& rects,
+ chip_details& crop_plan,
+ bool& should_flip_crop
+ )
+ {
+ std::lock_guard<std::mutex> lock(rnd_mutex);
+ rectangle crop_rect;
+ if (has_non_ignored_box(rects) && rnd.get_random_double() >= background_crops_fraction)
+ {
+ auto rect = rects[randomly_pick_rect(rects)].rect;
+
+ // perturb the location of the crop by a small fraction of the object's size.
+ const point rand_translate = dpoint(rnd.get_double_in_range(-translate_amount,translate_amount)*std::max(rect.height(),rect.width()),
+ rnd.get_double_in_range(-translate_amount,translate_amount)*std::max(rect.height(),rect.width()));
+
+ // We are going to grow rect into the cropping rect. First, we grow it a
+ // little so that it has the desired minimum border around it.
+ drectangle drect = centered_drect(center(rect)+rand_translate, rect.width()/max_object_size, rect.height()/max_object_size);
+
+ // Now make rect have the same aspect ratio as dims so that there won't be
+ // any funny stretching when we crop it. We do this by growing it along
+ // whichever dimension is too short.
+ const double target_aspect = dims.cols/(double)dims.rows;
+ if (drect.width()/drect.height() < target_aspect)
+ drect = centered_drect(drect, target_aspect*drect.height(), drect.height());
+ else
+ drect = centered_drect(drect, drect.width(), drect.width()/target_aspect);
+
+ // Now perturb the scale of the crop. We do this by shrinking it, but not
+ // so much that it gets smaller than the min object sizes require.
+ double current_width = dims.cols*rect.width()/drect.width();
+ double current_height = dims.rows*rect.height()/drect.height();
+
+ // never make any dimension smaller than the short dim.
+ double min_scale1 = std::max(min_object_length_short_dim/current_width, min_object_length_short_dim/current_height);
+ // at least one dimension needs to be longer than the long dim.
+ double min_scale2 = std::min(min_object_length_long_dim/current_width, min_object_length_long_dim/current_height);
+ double min_scale = std::max(min_scale1, min_scale2);
+
+ const double rand_scale_perturb = 1.0/rnd.get_double_in_range(min_scale, 1);
+ crop_rect = centered_drect(drect, drect.width()*rand_scale_perturb, drect.height()*rand_scale_perturb);
+
+ }
+ else
+ {
+ crop_rect = make_random_cropping_rect(img);
+ }
+ should_flip_crop = randomly_flip && rnd.get_random_double() > 0.5;
+ const double angle = rnd.get_double_in_range(-max_rotation_degrees, max_rotation_degrees)*pi/180;
+ crop_plan = chip_details(crop_rect, dims, angle);
+ }
+
+ bool has_non_ignored_box (
+ const std::vector<mmod_rect>& rects
+ ) const
+ {
+ for (auto&& b : rects)
+ {
+ if (!b.ignore)
+ return true;
+ }
+ return false;
+ }
+
+ size_t randomly_pick_rect (
+ const std::vector<mmod_rect>& rects
+ )
+ {
+ DLIB_CASSERT(has_non_ignored_box(rects));
+ size_t idx = rnd.get_integer(rects.size());
+ while(rects[idx].ignore)
+ idx = rnd.get_integer(rects.size());
+ return idx;
+ }
+
+ template <typename image_type>
+ rectangle make_random_cropping_rect(
+ const image_type& img_
+ )
+ {
+ const_image_view<image_type> img(img_);
+ // Figure out what rectangle we want to crop from the image. We are going to
+ // crop out an image of size this->dims, so we pick a random scale factor that
+ // lets this random box be either as big as it can be while still fitting in
+ // the image or as small as a 3x zoomed in box randomly somewhere in the image.
+ double mins = 1.0/3.0, maxs = std::min(img.nr()/(double)dims.rows, img.nc()/(double)dims.cols);
+ mins = std::min(mins, maxs);
+ auto scale = rnd.get_double_in_range(mins, maxs);
+ rectangle rect(scale*dims.cols, scale*dims.rows);
+ // randomly shift the box around
+ point offset(rnd.get_integer(1+img.nc()-rect.width()),
+ rnd.get_integer(1+img.nr()-rect.height()));
+ return move_rect(rect, offset);
+ }
+
+
+
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ inline std::ostream& operator<< (
+ std::ostream& out,
+ const random_cropper& item
+ )
+ {
+ using std::endl;
+ out << "random_cropper details: " << endl;
+ out << " chip_dims.rows: " << item.get_chip_dims().rows << endl;
+ out << " chip_dims.cols: " << item.get_chip_dims().cols << endl;
+ out << " randomly_flip: " << std::boolalpha << item.get_randomly_flip() << endl;
+ out << " max_rotation_degrees: " << item.get_max_rotation_degrees() << endl;
+ out << " min_object_length_long_dim: " << item.get_min_object_length_long_dim() << endl;
+ out << " min_object_length_short_dim: " << item.get_min_object_length_short_dim() << endl;
+ out << " max_object_size: " << item.get_max_object_size() << endl;
+ out << " background_crops_fraction: " << item.get_background_crops_fraction() << endl;
+ out << " translate_amount: " << item.get_translate_amount() << endl;
+ return out;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_RaNDOM_CROPPER_H_
+
diff --git a/ml/dlib/dlib/image_transforms/random_cropper_abstract.h b/ml/dlib/dlib/image_transforms/random_cropper_abstract.h
new file mode 100644
index 000000000..7603a1c47
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/random_cropper_abstract.h
@@ -0,0 +1,346 @@
+// Copyright (C) 2016 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_RaNDOM_CROPPER_ABSTRACT_H_
+#ifdef DLIB_RaNDOM_CROPPER_ABSTRACT_H_
+
+#include "../threads.h"
+#include <mutex>
+#include <vector>
+#include "interpolation.h"
+#include "../image_processing/full_object_detection.h"
+#include "../rand.h"
+
+namespace dlib
+{
+ class random_cropper
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This object is a tool for extracting random crops of objects from a set of
+ images. The crops are randomly jittered in scale, translation, and
+ rotation but more or less centered on objects specified by mmod_rect
+ objects.
+
+ THREAD SAFETY
+ It is safe for multiple threads to make concurrent calls to this object's
+ operator() methods.
+ !*/
+
+ public:
+
+ random_cropper (
+ );
+ /*!
+ ensures
+ - #get_chip_dims() == chip_dims(300,300)
+ - #get_randomly_flip() == true
+ - #get_max_rotation_degrees() == 30
+ - #get_min_object_length_long_dim() == 70
+ - #get_min_object_length_short_dim() == 30
+ - #get_max_object_size() == 0.7
+ - #get_background_crops_fraction() == 0.5
+ - #get_translate_amount() == 0.1
+ !*/
+
+ void set_seed (
+ time_t seed
+ );
+ /*!
+ ensures
+ - Seeds the internal random number generator with the given seed.
+ !*/
+
+ double get_translate_amount (
+ ) const;
+ /*!
+ ensures
+ - When a box is cropped out, it will be randomly translated prior to
+ cropping by #get_translate_amount()*(the box's height) up or down and
+ #get_translate_amount()*(the box's width) left or right.
+ !*/
+
+ void set_translate_amount (
+ double value
+ );
+ /*!
+ requires
+ - value >= 0
+ ensures
+ - #get_translate_amount() == value
+ !*/
+
+ double get_background_crops_fraction (
+ ) const;
+ /*!
+ ensures
+ - When making random crops, get_background_crops_fraction() fraction of
+ them will be from random background rather than being centered on some
+ object in the dataset.
+ !*/
+
+ void set_background_crops_fraction (
+ double value
+ );
+ /*!
+ requires
+ - 0 <= value <= 1
+ ensures
+ - #get_background_crops_fraction() == value
+ !*/
+
+ const chip_dims& get_chip_dims(
+ ) const;
+ /*!
+ ensures
+ - returns the dimensions of image chips produced by this object.
+ !*/
+
+ void set_chip_dims (
+ const chip_dims& dims
+ );
+ /*!
+ ensures
+ - #get_chip_dims() == dims
+ !*/
+
+ void set_chip_dims (
+ unsigned long rows,
+ unsigned long cols
+ );
+ /*!
+ ensures
+ - #get_chip_dims() == chip_dims(rows,cols)
+ !*/
+
+ bool get_randomly_flip (
+ ) const;
+ /*!
+ ensures
+ - if this object will randomly mirror chips left to right.
+ !*/
+
+ void set_randomly_flip (
+ bool value
+ );
+ /*!
+ ensures
+ - #get_randomly_flip() == value
+ !*/
+
+ double get_max_rotation_degrees (
+ ) const;
+ /*!
+ ensures
+ - When extracting an image chip, this object will pick a random rotation
+ in the range [-get_max_rotation_degrees(), get_max_rotation_degrees()]
+ and rotate the chip by that amount.
+ !*/
+
+ void set_max_rotation_degrees (
+ double value
+ );
+ /*!
+ ensures
+ - #get_max_rotation_degrees() == std::abs(value)
+ !*/
+
+ long get_min_object_length_long_dim (
+ ) const;
+ /*!
+ ensures
+ - When a chip is extracted around an object, the chip will be sized so that
+ the longest edge of the object (i.e. either its height or width,
+ whichever is longer) is at least #get_min_object_length_long_dim() pixels
+ in length. When we say "object" here we are referring specifically to
+ the rectangle in the mmod_rect output by the cropper.
+ !*/
+
+ long get_min_object_length_short_dim (
+ ) const;
+ /*!
+ ensures
+ - When a chip is extracted around an object, the chip will be sized so that
+ the shortest edge of the object (i.e. either its height or width,
+ whichever is shorter) is at least #get_min_object_length_short_dim()
+ pixels in length. When we say "object" here we are referring
+ specifically to the rectangle in the mmod_rect output by the cropper.
+ !*/
+
+ void set_min_object_size (
+ long long_dim,
+ long short_dim
+ );
+ /*!
+ requires
+ - 0 < short_dim <= long_dim
+ ensures
+ - #get_min_object_length_short_dim() == short_dim
+ - #get_min_object_length_long_dim() == long_dim
+ !*/
+
+ double get_max_object_size (
+ ) const;
+ /*!
+ ensures
+ - When a chip is extracted around an object, the chip will be sized so that
+ both the object's height and width are at most get_max_object_size() *
+ the chip's height and width, respectively. E.g. if the chip is 640x480
+ pixels in size then the object will be at most 480*get_max_object_size()
+ pixels tall and 640*get_max_object_size() pixels wide.
+ !*/
+
+ void set_max_object_size (
+ double value
+ );
+ /*!
+ requires
+ - 0 < value
+ ensures
+ - #get_max_object_size() == value
+ !*/
+
+ template <
+ typename array_type
+ >
+ void append (
+ size_t num_crops,
+ const array_type& images,
+ const std::vector<std::vector<mmod_rect>>& rects,
+ array_type& crops,
+ std::vector<std::vector<mmod_rect>>& crop_rects
+ );
+ /*!
+ requires
+ - images.size() == rects.size()
+ - crops.size() == crop_rects.size()
+ - for all valid i:
+ - images[i].size() != 0
+ - array_type is a type with an interface compatible with dlib::array or
+ std::vector and it must in turn contain image objects that implement the
+ interface defined in dlib/image_processing/generic_image.h
+ ensures
+ - Randomly extracts num_crops chips from images and appends them to the end
+ of crops. We also copy the object metadata for each extracted crop and
+ store it into #crop_rects. In particular, calling this function is the
+ same as making multiple calls to the version of operator() below that
+ outputs a single crop, except that append() will use multiple CPU cores
+ to do the processing and is therefore faster.
+ - #crops.size() == crops.size()+num_crops
+ - #crop_rects.size() == crop_rects.size()+num_crops
+ !*/
+
+ template <
+ typename array_type
+ >
+ void operator() (
+ size_t num_crops,
+ const array_type& images,
+ const std::vector<std::vector<mmod_rect>>& rects,
+ array_type& crops,
+ std::vector<std::vector<mmod_rect>>& crop_rects
+ );
+ /*!
+ requires
+ - images.size() == rects.size()
+ - for all valid i:
+ - images[i].size() != 0
+ - array_type is a type with an interface compatible with dlib::array or
+ std::vector and it must in turn contain image objects that implement the
+ interface defined in dlib/image_processing/generic_image.h
+ ensures
+ - Randomly extracts num_crops chips from images. We also copy the object
+ metadata for each extracted crop and store it into #crop_rects. In
+ particular, calling this function is the same as invoking the version of
+ operator() below multiple times, except that this version of operator()
+ will use multiple CPU cores to do the processing and is therefore faster.
+ - #crops.size() == num_crops
+ - #crop_rects.size() == num_crops
+ !*/
+
+ template <
+ typename array_type,
+ typename image_type
+ >
+ void operator() (
+ const array_type& images,
+ const std::vector<std::vector<mmod_rect>>& rects,
+ image_type& crop,
+ std::vector<mmod_rect>& crop_rects
+ );
+ /*!
+ requires
+ - images.size() == rects.size()
+ - for all valid i:
+ - images[i].size() != 0
+ - image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - array_type is a type with an interface compatible with dlib::array or
+ std::vector and it must in turn contain image objects that implement the
+ interface defined in dlib/image_processing/generic_image.h
+ ensures
+ - Selects a random image and creates a random crop from it. Specifically,
+ we pick a random index IDX < images.size() and then execute
+ (*this)(images[IDX],rects[IDX],crop,crop_rects)
+ !*/
+
+ template <
+ typename image_type1,
+ typename image_type2
+ >
+ void operator() (
+ const image_type1& img,
+ const std::vector<mmod_rect>& rects,
+ image_type2& crop,
+ std::vector<mmod_rect>& crop_rects
+ );
+ /*!
+ requires
+ - img.size() != 0
+ - image_type1 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - image_type2 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ ensures
+ - Extracts a random crop from img and copies over the mmod_rect objects in
+ rects to #crop_rects if they are contained inside the crop. Moreover,
+ rectangles are marked as ignore if they aren't completely contained
+ inside the crop.
+ - #crop_rects.size() <= rects.size()
+ !*/
+
+ template <
+ typename image_type1
+ >
+ image_type1 operator() (
+ const image_type1& img
+ );
+ /*!
+ requires
+ - img.size() != 0
+ - image_type1 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ ensures
+ - This function simply calls (*this)(img, junk1, crop, junk2) and returns
+ crop. Therefore it is simply a convenience function for extracting a
+ random background patch.
+ !*/
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ std::ostream& operator<< (
+ std::ostream& out,
+ const random_cropper& item
+ );
+ /*!
+ ensures
+ - Prints the state of all the parameters of item to out.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_RaNDOM_CROPPER_ABSTRACT_H_
+
+
diff --git a/ml/dlib/dlib/image_transforms/segment_image.h b/ml/dlib/dlib/image_transforms/segment_image.h
new file mode 100644
index 000000000..3b57e4801
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/segment_image.h
@@ -0,0 +1,730 @@
+// Copyright (C) 2011 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_SEGMENT_ImAGE_Hh_
+#define DLIB_SEGMENT_ImAGE_Hh_
+
+#include "segment_image_abstract.h"
+#include "../algs.h"
+#include <vector>
+#include "../geometry.h"
+#include "../disjoint_subsets.h"
+#include "../set.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ namespace impl
+ {
+ template <typename T>
+ inline T edge_diff_uint(
+ const T& a,
+ const T& b
+ )
+ {
+ if (a > b)
+ return a - b;
+ else
+ return b - a;
+ }
+
+ // ----------------------------------------
+
+ template <typename T, typename enabled = void>
+ struct edge_diff_funct
+ {
+ typedef double diff_type;
+
+ template <typename pixel_type>
+ double operator()(
+ const pixel_type& a,
+ const pixel_type& b
+ ) const
+ {
+ return length(pixel_to_vector<double>(a) - pixel_to_vector<double>(b));
+ }
+ };
+
+ template <>
+ struct edge_diff_funct<uint8,void>
+ {
+ typedef uint8 diff_type;
+ uint8 operator()( const uint8& a, const uint8& b) const { return edge_diff_uint(a,b); }
+ };
+
+ template <>
+ struct edge_diff_funct<uint16,void>
+ {
+ typedef uint16 diff_type;
+ uint16 operator()( const uint16& a, const uint16& b) const { return edge_diff_uint(a,b); }
+ };
+
+ template <>
+ struct edge_diff_funct<uint32,void>
+ {
+ typedef uint32 diff_type;
+ uint32 operator()( const uint32& a, const uint32& b) const { return edge_diff_uint(a,b); }
+ };
+
+ template <>
+ struct edge_diff_funct<double,void>
+ {
+ typedef double diff_type;
+ double operator()( const double& a, const double& b) const { return std::abs(a-b); }
+ };
+
+ template <typename T>
+ struct edge_diff_funct<T, typename enable_if<is_matrix<T> >::type>
+ {
+ typedef double diff_type;
+ double operator()(
+ const T& a,
+ const T& b
+ ) const
+ {
+ return length(a-b);
+ }
+ };
+
+ // ------------------------------------------------------------------------------------
+
+ template <typename T>
+ struct graph_image_segmentation_data_T
+ {
+ graph_image_segmentation_data_T() : component_size(1), internal_diff(0) {}
+ unsigned long component_size;
+ T internal_diff;
+ };
+
+ // ------------------------------------------------------------------------------------
+
+ template <typename T>
+ struct segment_image_edge_data_T
+ {
+ segment_image_edge_data_T (){}
+
+ segment_image_edge_data_T (
+ const rectangle& rect,
+ const point& p1,
+ const point& p2,
+ const T& diff_
+ ) :
+ idx1(p1.y()*rect.width() + p1.x()),
+ idx2(p2.y()*rect.width() + p2.x()),
+ diff(diff_)
+ {}
+
+ bool operator<(const segment_image_edge_data_T& item) const
+ { return diff < item.diff; }
+
+ unsigned long idx1;
+ unsigned long idx2;
+ T diff;
+ };
+
+ // ------------------------------------------------------------------------------------
+
+ template <typename image_view_type>
+ struct uint8_or_uint16_pixels
+ {
+ typedef typename image_view_type::pixel_type pixel_type;
+ const static bool value = is_same_type<pixel_type,uint8>::value ||
+ is_same_type<pixel_type,uint16>::value;
+ };
+
+ // This is an overload of get_pixel_edges() that is optimized to segment images
+ // with 8bit or 16bit pixels very quickly. We do this by using a radix sort
+ // instead of quicksort.
+ template <typename in_image_type, typename T>
+ typename enable_if<uint8_or_uint16_pixels<in_image_type> >::type
+ get_pixel_edges (
+ const in_image_type& in_img,
+ std::vector<segment_image_edge_data_T<T> >& sorted_edges
+ )
+ {
+ typedef typename in_image_type::pixel_type ptype;
+ typedef T diff_type;
+ std::vector<unsigned long> counts(std::numeric_limits<ptype>::max()+1, 0);
+
+ edge_diff_funct<ptype> edge_diff;
+
+ border_enumerator be(get_rect(in_img), 1);
+ // we are going to do a radix sort on the edge weights. So the first step
+ // is to accumulate them into count.
+ const rectangle area = get_rect(in_img);
+ while (be.move_next())
+ {
+ const long r = be.element().y();
+ const long c = be.element().x();
+ const ptype pix = in_img[r][c];
+ if (area.contains(c-1,r)) counts[edge_diff(pix, in_img[r ][c-1])] += 1;
+ if (area.contains(c+1,r)) counts[edge_diff(pix, in_img[r ][c+1])] += 1;
+ if (area.contains(c ,r-1)) counts[edge_diff(pix, in_img[r-1][c ])] += 1;
+ if (area.contains(c ,r+1)) counts[edge_diff(pix, in_img[r+1][c ])] += 1;
+ }
+ for (long r = 1; r+1 < in_img.nr(); ++r)
+ {
+ for (long c = 1; c+1 < in_img.nc(); ++c)
+ {
+ const ptype pix = in_img[r][c];
+ counts[edge_diff(pix, in_img[r-1][c+1])] += 1;
+ counts[edge_diff(pix, in_img[r ][c+1])] += 1;
+ counts[edge_diff(pix, in_img[r+1][c ])] += 1;
+ counts[edge_diff(pix, in_img[r+1][c+1])] += 1;
+ }
+ }
+
+ const unsigned long num_edges = shrink_rect(area,1).area()*4 + in_img.nr()*2*3 - 4 + (in_img.nc()-2)*2*3;
+ typedef segment_image_edge_data_T<T> segment_image_edge_data;
+ sorted_edges.resize(num_edges);
+
+ // integrate counts. The idea is to have sorted_edges[counts[i]] be the location that edges
+ // with an edge_diff of i go. So counts[0] == 0, counts[1] == number of 0 edge diff edges, etc.
+ unsigned long prev = counts[0];
+ for (unsigned long i = 1; i < counts.size(); ++i)
+ {
+ const unsigned long temp = counts[i];
+ counts[i] += counts[i-1];
+ counts[i-1] -= prev;
+ prev = temp;
+ }
+ counts[counts.size()-1] -= prev;
+
+
+ // now build a sorted list of all the edges
+ be.reset();
+ while(be.move_next())
+ {
+ const point p = be.element();
+ const long r = p.y();
+ const long c = p.x();
+ const ptype pix = in_img[r][c];
+ if (area.contains(c-1,r))
+ {
+ const diff_type diff = edge_diff(pix, in_img[r ][c-1]);
+ sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c-1,r),diff);
+ }
+
+ if (area.contains(c+1,r))
+ {
+ const diff_type diff = edge_diff(pix, in_img[r ][c+1]);
+ sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r),diff);
+ }
+
+ if (area.contains(c ,r-1))
+ {
+ const diff_type diff = edge_diff(pix, in_img[r-1][c ]);
+ sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c ,r-1),diff);
+ }
+
+ if (area.contains(c ,r+1))
+ {
+ const diff_type diff = edge_diff(pix, in_img[r+1][c ]);
+ sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c ,r+1),diff);
+ }
+ }
+ // same thing as the above loop but now we do it on the interior of the image and therefore
+ // don't have to include the boundary checking if statements used above.
+ for (long r = 1; r+1 < in_img.nr(); ++r)
+ {
+ for (long c = 1; c+1 < in_img.nc(); ++c)
+ {
+ const point p(c,r);
+ const ptype pix = in_img[r][c];
+ diff_type diff;
+
+ diff = edge_diff(pix, in_img[r ][c+1]);
+ sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r),diff);
+ diff = edge_diff(pix, in_img[r-1][c+1]);
+ sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r-1),diff);
+ diff = edge_diff(pix, in_img[r+1][c+1]);
+ sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r+1),diff);
+ diff = edge_diff(pix, in_img[r+1][c ]);
+ sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c ,r+1),diff);
+ }
+ }
+ }
+
+ // ----------------------------------------------------------------------------------------
+
+ // This is the general purpose version of get_pixel_edges(). It handles all pixel types.
+ template <typename in_image_type, typename T>
+ typename disable_if<uint8_or_uint16_pixels<in_image_type> >::type
+ get_pixel_edges (
+ const in_image_type& in_img,
+ std::vector<segment_image_edge_data_T<T> >& sorted_edges
+ )
+ {
+ const rectangle area = get_rect(in_img);
+ sorted_edges.reserve(area.area()*4);
+
+ typedef typename in_image_type::pixel_type ptype;
+ edge_diff_funct<ptype> edge_diff;
+ typedef T diff_type;
+ typedef segment_image_edge_data_T<T> segment_image_edge_data;
+
+ border_enumerator be(get_rect(in_img), 1);
+
+ // now build a sorted list of all the edges
+ be.reset();
+ while(be.move_next())
+ {
+ const point p = be.element();
+ const long r = p.y();
+ const long c = p.x();
+ const ptype& pix = in_img[r][c];
+ if (area.contains(c-1,r))
+ {
+ const diff_type diff = edge_diff(pix, in_img[r ][c-1]);
+ sorted_edges.push_back(segment_image_edge_data(area,p,point(c-1,r),diff));
+ }
+
+ if (area.contains(c+1,r))
+ {
+ const diff_type diff = edge_diff(pix, in_img[r ][c+1]);
+ sorted_edges.push_back(segment_image_edge_data(area,p,point(c+1,r),diff));
+ }
+
+ if (area.contains(c ,r-1))
+ {
+ const diff_type diff = edge_diff(pix, in_img[r-1][c ]);
+ sorted_edges.push_back( segment_image_edge_data(area,p,point(c ,r-1),diff));
+ }
+ if (area.contains(c ,r+1))
+ {
+ const diff_type diff = edge_diff(pix, in_img[r+1][c ]);
+ sorted_edges.push_back( segment_image_edge_data(area,p,point(c ,r+1),diff));
+ }
+ }
+ // same thing as the above loop but now we do it on the interior of the image and therefore
+ // don't have to include the boundary checking if statements used above.
+ for (long r = 1; r+1 < in_img.nr(); ++r)
+ {
+ for (long c = 1; c+1 < in_img.nc(); ++c)
+ {
+ const point p(c,r);
+ const ptype& pix = in_img[r][c];
+ diff_type diff;
+
+ diff = edge_diff(pix, in_img[r ][c+1]);
+ sorted_edges.push_back( segment_image_edge_data(area,p,point(c+1,r),diff));
+ diff = edge_diff(pix, in_img[r+1][c+1]);
+ sorted_edges.push_back( segment_image_edge_data(area,p,point(c+1,r+1),diff));
+ diff = edge_diff(pix, in_img[r+1][c ]);
+ sorted_edges.push_back( segment_image_edge_data(area,p,point(c ,r+1),diff));
+ diff = edge_diff(pix, in_img[r-1][c+1]);
+ sorted_edges.push_back( segment_image_edge_data(area,p,point(c+1,r-1),diff));
+ }
+ }
+
+ std::sort(sorted_edges.begin(), sorted_edges.end());
+
+ }
+
+ // ------------------------------------------------------------------------------------
+
+ } // end of namespace impl
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type
+ >
+ void segment_image (
+ const in_image_type& in_img_,
+ out_image_type& out_img_,
+ const double k = 200,
+ const unsigned long min_size = 10
+ )
+ {
+ using namespace dlib::impl;
+ typedef typename image_traits<in_image_type>::pixel_type ptype;
+ typedef typename edge_diff_funct<ptype>::diff_type diff_type;
+
+ // make sure requires clause is not broken
+ DLIB_ASSERT(is_same_object(in_img_, out_img_) == false,
+ "\t void segment_image()"
+ << "\n\t The input images can't be the same object."
+ );
+
+ COMPILE_TIME_ASSERT(is_unsigned_type<typename image_traits<out_image_type>::pixel_type>::value);
+
+ const_image_view<in_image_type> in_img(in_img_);
+ image_view<out_image_type> out_img(out_img_);
+
+ out_img.set_size(in_img.nr(), in_img.nc());
+ // don't bother doing anything if the image is too small
+ if (in_img.nr() < 2 || in_img.nc() < 2)
+ {
+ assign_all_pixels(out_img,0);
+ return;
+ }
+
+ disjoint_subsets sets;
+ sets.set_size(in_img.size());
+
+ std::vector<segment_image_edge_data_T<diff_type> > sorted_edges;
+ get_pixel_edges(in_img, sorted_edges);
+
+ std::vector<graph_image_segmentation_data_T<diff_type> > data(in_img.size());
+
+ // now start connecting blobs together to make a minimum spanning tree.
+ for (unsigned long i = 0; i < sorted_edges.size(); ++i)
+ {
+ const unsigned long idx1 = sorted_edges[i].idx1;
+ const unsigned long idx2 = sorted_edges[i].idx2;
+
+ unsigned long set1 = sets.find_set(idx1);
+ unsigned long set2 = sets.find_set(idx2);
+ if (set1 != set2)
+ {
+ const diff_type diff = sorted_edges[i].diff;
+ const diff_type tau1 = static_cast<diff_type>(k/data[set1].component_size);
+ const diff_type tau2 = static_cast<diff_type>(k/data[set2].component_size);
+
+ const diff_type mint = std::min(data[set1].internal_diff + tau1,
+ data[set2].internal_diff + tau2);
+ if (diff <= mint)
+ {
+ const unsigned long new_set = sets.merge_sets(set1, set2);
+ data[new_set].component_size = data[set1].component_size + data[set2].component_size;
+ data[new_set].internal_diff = diff;
+ }
+ }
+ }
+
+ // now merge any really small blobs
+ if (min_size != 0)
+ {
+ for (unsigned long i = 0; i < sorted_edges.size(); ++i)
+ {
+ const unsigned long idx1 = sorted_edges[i].idx1;
+ const unsigned long idx2 = sorted_edges[i].idx2;
+
+ unsigned long set1 = sets.find_set(idx1);
+ unsigned long set2 = sets.find_set(idx2);
+ if (set1 != set2 && (data[set1].component_size < min_size || data[set2].component_size < min_size))
+ {
+ const unsigned long new_set = sets.merge_sets(set1, set2);
+ data[new_set].component_size = data[set1].component_size + data[set2].component_size;
+ //data[new_set].internal_diff = sorted_edges[i].diff;
+ }
+ }
+ }
+
+ unsigned long idx = 0;
+ for (long r = 0; r < out_img.nr(); ++r)
+ {
+ for (long c = 0; c < out_img.nc(); ++c)
+ {
+ out_img[r][c] = sets.find_set(idx++);
+ }
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// Candidate object location generation code.
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ namespace impl
+ {
+ struct edge_data
+ {
+ double edge_diff;
+ unsigned long set1;
+ unsigned long set2;
+ bool operator<(const edge_data& item) const
+ {
+ return edge_diff < item.edge_diff;
+ }
+ };
+
+ template <
+ typename in_image_type,
+ typename diff_type
+ >
+ void find_basic_candidate_object_locations (
+ const in_image_type& in_img,
+ const std::vector<dlib::impl::segment_image_edge_data_T<diff_type> >& sorted_edges,
+ std::vector<rectangle>& out_rects,
+ std::vector<edge_data>& edges,
+ const double k,
+ const unsigned long min_size
+ )
+ {
+ using namespace dlib::impl;
+
+ std::vector<dlib::impl::segment_image_edge_data_T<diff_type> > rejected_edges;
+ rejected_edges.reserve(sorted_edges.size());
+
+ out_rects.clear();
+ edges.clear();
+
+ // don't bother doing anything if the image is too small
+ if (in_img.nr() < 2 || in_img.nc() < 2)
+ {
+ return;
+ }
+
+ disjoint_subsets sets;
+ sets.set_size(in_img.size());
+
+
+ std::vector<graph_image_segmentation_data_T<diff_type> > data(in_img.size());
+
+
+
+ std::pair<unsigned long,unsigned long> last_blob_edge(std::numeric_limits<unsigned long>::max(),
+ std::numeric_limits<unsigned long>::max());;
+ // now start connecting blobs together to make a minimum spanning tree.
+ for (unsigned long i = 0; i < sorted_edges.size(); ++i)
+ {
+ const unsigned long idx1 = sorted_edges[i].idx1;
+ const unsigned long idx2 = sorted_edges[i].idx2;
+
+ unsigned long set1 = sets.find_set(idx1);
+ unsigned long set2 = sets.find_set(idx2);
+ if (set1 != set2)
+ {
+ const diff_type diff = sorted_edges[i].diff;
+ const diff_type tau1 = static_cast<diff_type>(k/data[set1].component_size);
+ const diff_type tau2 = static_cast<diff_type>(k/data[set2].component_size);
+
+ const diff_type mint = std::min(data[set1].internal_diff + tau1,
+ data[set2].internal_diff + tau2);
+ if (diff <= mint)
+ {
+ const unsigned long new_set = sets.merge_sets(set1, set2);
+ data[new_set].component_size = data[set1].component_size + data[set2].component_size;
+ data[new_set].internal_diff = diff;
+ }
+ else
+ {
+ // Don't bother keeping multiple edges from the same pair of blobs, we
+ // only need one for what we will do later.
+ if (std::make_pair(set1,set2) != last_blob_edge)
+ {
+ segment_image_edge_data_T<diff_type> temp = sorted_edges[i];
+ temp.idx1 = set1;
+ temp.idx2 = set2;
+ rejected_edges.push_back(temp);
+ last_blob_edge = std::make_pair(set1,set2);
+ }
+ }
+ }
+ }
+
+
+ // merge small blobs
+ for (unsigned long i = 0; i < rejected_edges.size(); ++i)
+ {
+ const unsigned long idx1 = rejected_edges[i].idx1;
+ const unsigned long idx2 = rejected_edges[i].idx2;
+
+ unsigned long set1 = sets.find_set(idx1);
+ unsigned long set2 = sets.find_set(idx2);
+ rejected_edges[i].idx1 = set1;
+ rejected_edges[i].idx2 = set2;
+ if (set1 != set2 && (data[set1].component_size < min_size || data[set2].component_size < min_size))
+ {
+ const unsigned long new_set = sets.merge_sets(set1, set2);
+ data[new_set].component_size = data[set1].component_size + data[set2].component_size;
+ data[new_set].internal_diff = rejected_edges[i].diff;
+ }
+ }
+
+ // find bounding boxes of each blob
+ std::map<unsigned long, rectangle> boxes;
+ std::map<unsigned long, unsigned long> box_id_map;
+ unsigned long idx = 0;
+ for (long r = 0; r < in_img.nr(); ++r)
+ {
+ for (long c = 0; c < in_img.nc(); ++c)
+ {
+ const unsigned long id = sets.find_set(idx++);
+ // Accumulate the current point into its box and if it is the first point
+ // in the box then also record the id number for this box.
+ if ((boxes[id] += point(c,r)).area() == 1)
+ box_id_map[id] = boxes.size()-1;
+ }
+ }
+
+ // copy boxes into out_rects
+ out_rects.resize(boxes.size());
+ for (std::map<unsigned long,rectangle>::iterator i = boxes.begin(); i != boxes.end(); ++i)
+ {
+ out_rects[box_id_map[i->first]] = i->second;
+ }
+
+ // Now find the edges between the boxes
+ typedef dlib::memory_manager<char>::kernel_2c mm_type;
+ dlib::set<std::pair<unsigned long, unsigned long>, mm_type>::kernel_1a neighbors_final;
+ for (unsigned long i = 0; i < rejected_edges.size(); ++i)
+ {
+ const unsigned long idx1 = rejected_edges[i].idx1;
+ const unsigned long idx2 = rejected_edges[i].idx2;
+
+ unsigned long set1 = sets.find_set(idx1);
+ unsigned long set2 = sets.find_set(idx2);
+ if (set1 != set2)
+ {
+ std::pair<unsigned long, unsigned long> p = std::make_pair(set1,set2);
+ if (!neighbors_final.is_member(p))
+ {
+ neighbors_final.add(p);
+
+ edge_data temp;
+ const diff_type mint = std::min(data[set1].internal_diff ,
+ data[set2].internal_diff );
+ temp.edge_diff = rejected_edges[i].diff - mint;
+ temp.set1 = box_id_map[set1];
+ temp.set2 = box_id_map[set2];
+ edges.push_back(temp);
+ }
+ }
+ }
+
+ std::sort(edges.begin(), edges.end());
+ }
+ } // end namespace impl
+
+// ----------------------------------------------------------------------------------------
+
+ template <typename alloc>
+ void remove_duplicates (
+ std::vector<rectangle,alloc>& rects
+ )
+ {
+ std::sort(rects.begin(), rects.end(), std::less<rectangle>());
+ unsigned long num_unique = 1;
+ for (unsigned long i = 1; i < rects.size(); ++i)
+ {
+ if (rects[i] != rects[i-1])
+ {
+ rects[num_unique++] = rects[i];
+ }
+ }
+ if (rects.size() != 0)
+ rects.resize(num_unique);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename EXP
+ >
+ void find_candidate_object_locations (
+ const in_image_type& in_img_,
+ std::vector<rectangle>& rects,
+ const matrix_exp<EXP>& kvals,
+ const unsigned long min_size = 20,
+ const unsigned long max_merging_iterations = 50
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(is_vector(kvals) && kvals.size() > 0,
+ "\t void find_candidate_object_locations()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t is_vector(kvals): " << is_vector(kvals)
+ << "\n\t kvals.size(): " << kvals.size()
+ );
+
+ typedef dlib::memory_manager<char>::kernel_2c mm_type;
+ typedef dlib::set<rectangle, mm_type>::kernel_1a set_of_rects;
+
+ using namespace dlib::impl;
+ typedef typename image_traits<in_image_type>::pixel_type ptype;
+ typedef typename edge_diff_funct<ptype>::diff_type diff_type;
+
+ const_image_view<in_image_type> in_img(in_img_);
+
+ // don't bother doing anything if the image is too small
+ if (in_img.nr() < 2 || in_img.nc() < 2)
+ {
+ return;
+ }
+
+ std::vector<edge_data> edges;
+ std::vector<rectangle> working_rects;
+ std::vector<segment_image_edge_data_T<diff_type> > sorted_edges;
+ get_pixel_edges(in_img, sorted_edges);
+
+ disjoint_subsets sets;
+
+ for (long j = 0; j < kvals.size(); ++j)
+ {
+ const double k = kvals(j);
+
+ find_basic_candidate_object_locations(in_img, sorted_edges, working_rects, edges, k, min_size);
+ rects.insert(rects.end(), working_rects.begin(), working_rects.end());
+
+
+ // Now iteratively merge all the rectangles we have and record the results.
+ // Note that, unlike what is described in the paper
+ // Segmentation as Selective Search for Object Recognition" by Koen E. A. van de Sande, et al.
+ // we don't use any kind of histogram/SIFT like thing to order the edges
+ // between the blobs. Here we simply order by the pixel difference value.
+ // Additionally, note that we keep progressively merging boxes in the outer
+ // loop rather than performing just a single iteration as indicated in the
+ // paper.
+ set_of_rects detected_rects;
+ bool did_merge = true;
+ for (unsigned long iter = 0; did_merge && iter < max_merging_iterations; ++iter)
+ {
+ did_merge = false;
+ sets.clear();
+ sets.set_size(working_rects.size());
+
+ // recursively merge neighboring blobs until we have merged everything
+ for (unsigned long i = 0; i < edges.size(); ++i)
+ {
+ edge_data temp = edges[i];
+
+ temp.set1 = sets.find_set(temp.set1);
+ temp.set2 = sets.find_set(temp.set2);
+ if (temp.set1 != temp.set2)
+ {
+ rectangle merged_rect = working_rects[temp.set1] + working_rects[temp.set2];
+ // Skip merging this pair of blobs if it was merged in a previous
+ // iteration. Doing this lets us consider other possible blob
+ // merges.
+ if (!detected_rects.is_member(merged_rect))
+ {
+ const unsigned long new_set = sets.merge_sets(temp.set1, temp.set2);
+ rects.push_back(merged_rect);
+ working_rects[new_set] = merged_rect;
+ did_merge = true;
+ detected_rects.add(merged_rect);
+ }
+ }
+ }
+ }
+ }
+
+ remove_duplicates(rects);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type
+ >
+ void find_candidate_object_locations (
+ const in_image_type& in_img,
+ std::vector<rectangle>& rects
+ )
+ {
+ find_candidate_object_locations(in_img, rects, linspace(50, 200, 3));
+ }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SEGMENT_ImAGE_Hh_
+
diff --git a/ml/dlib/dlib/image_transforms/segment_image_abstract.h b/ml/dlib/dlib/image_transforms/segment_image_abstract.h
new file mode 100644
index 000000000..af1af46a1
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/segment_image_abstract.h
@@ -0,0 +1,126 @@
+// Copyright (C) 2011 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_SEGMENT_ImAGE_ABSTRACT_Hh_
+#ifdef DLIB_SEGMENT_ImAGE_ABSTRACT_Hh_
+
+#include <vector>
+#include "../matrix.h"
+#include "../image_processing/generic_image.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type
+ >
+ void segment_image (
+ const in_image_type& in_img,
+ out_image_type& out_img,
+ const double k = 200,
+ const unsigned long min_size = 10
+ );
+ /*!
+ requires
+ - in_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - out_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - in_image_type can contain any pixel type with a pixel_traits specialization
+ or a dlib matrix object representing a row or column vector.
+ - out_image_type must contain an unsigned integer pixel type.
+ - is_same_object(in_img, out_img) == false
+ ensures
+ - Attempts to segment in_img into regions which have some visual consistency to
+ them. In particular, this function implements the algorithm described in the
+ paper: Efficient Graph-Based Image Segmentation by Felzenszwalb and Huttenlocher.
+ - #out_img.nr() == in_img.nr()
+ - #out_img.nc() == in_img.nc()
+ - for all valid r and c:
+ - #out_img[r][c] == an integer value indicating the identity of the segment
+ containing the pixel in_img[r][c].
+ - The k parameter is a measure used to influence how large the segment regions
+ will be. Larger k generally results in larger segments being produced. For
+ a deeper discussion of the k parameter you should consult the above
+ referenced paper.
+ - min_size is a lower bound on the size of the output segments. That is, it is
+ guaranteed that all output segments will have at least min_size pixels in
+ them (unless the whole image contains fewer than min_size pixels, in this
+ case the entire image will be put into a single segment).
+ !*/
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename EXP
+ >
+ void find_candidate_object_locations (
+ const in_image_type& in_img,
+ std::vector<rectangle>& rects,
+ const matrix_exp<EXP>& kvals = linspace(50, 200, 3),
+ const unsigned long min_size = 20,
+ const unsigned long max_merging_iterations = 50
+ );
+ /*!
+ requires
+ - in_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - is_vector(kvals) == true
+ - kvals.size() > 0
+ ensures
+ - This function takes an input image and generates a set of candidate
+ rectangles which are expected to bound any objects in the image. It does
+ this by running a version of the segment_image() routine on the image and
+ then reports rectangles containing each of the segments as well as rectangles
+ containing unions of adjacent segments. The basic idea is described in the
+ paper:
+ Segmentation as Selective Search for Object Recognition by Koen E. A. van de Sande, et al.
+ Note that this function deviates from what is described in the paper slightly.
+ See the code for details.
+ - The basic segmentation is performed kvals.size() times, each time with the k
+ parameter (see segment_image() and the Felzenszwalb paper for details on k)
+ set to a different value from kvals.
+ - When doing the basic segmentations prior to any box merging, we discard all
+ rectangles that have an area < min_size. Therefore, all outputs and
+ subsequent merged rectangles are built out of rectangles that contain at
+ least min_size pixels. Note that setting min_size to a smaller value than
+ you might otherwise be interested in using can be useful since it allows a
+ larger number of possible merged boxes to be created.
+ - There are max_merging_iterations rounds of neighboring blob merging.
+ Therefore, this parameter has some effect on the number of output rectangles
+ you get, with larger values of the parameter giving more output rectangles.
+ - This function appends the output rectangles into #rects. This means that any
+ rectangles in rects before this function was called will still be in there
+ after it terminates. Note further that #rects will not contain any duplicate
+ rectangles. That is, for all valid i and j where i != j it will be true
+ that:
+ - #rects[i] != rects[j]
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename alloc
+ >
+ void remove_duplicates (
+ std::vector<rectangle,alloc>& rects
+ );
+ /*!
+ ensures
+ - This function finds any duplicate rectangles in rects and removes the extra
+ instances. This way, the result is that rects contains only unique rectangle
+ instances.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SEGMENT_ImAGE_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/image_transforms/spatial_filtering.h b/ml/dlib/dlib/image_transforms/spatial_filtering.h
new file mode 100644
index 000000000..91dcae321
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/spatial_filtering.h
@@ -0,0 +1,1580 @@
+// Copyright (C) 2006 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_SPATIAL_FILTERINg_H_
+#define DLIB_SPATIAL_FILTERINg_H_
+
+#include "../pixel.h"
+#include "spatial_filtering_abstract.h"
+#include "../algs.h"
+#include "../assert.h"
+#include "../array2d.h"
+#include "../matrix.h"
+#include "../geometry/border_enumerator.h"
+#include "../simd.h"
+#include <limits>
+#include "assign_image.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ namespace impl
+ {
+ template <
+ typename in_image_type,
+ typename out_image_type,
+ typename EXP,
+ typename T
+ >
+ rectangle grayscale_spatially_filter_image (
+ const in_image_type& in_img_,
+ out_image_type& out_img_,
+ const matrix_exp<EXP>& filter_,
+ T scale,
+ bool use_abs,
+ bool add_to
+ )
+ {
+ const_temp_matrix<EXP> filter(filter_);
+ COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false );
+ COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false );
+
+ DLIB_ASSERT(scale != 0 && filter.size() != 0,
+ "\trectangle spatially_filter_image()"
+ << "\n\t You can't give a scale of zero or an empty filter."
+ << "\n\t scale: "<< scale
+ << "\n\t filter.nr(): "<< filter.nr()
+ << "\n\t filter.nc(): "<< filter.nc()
+ );
+ DLIB_ASSERT(is_same_object(in_img_, out_img_) == false,
+ "\trectangle spatially_filter_image()"
+ << "\n\tYou must give two different image objects"
+ );
+
+
+ const_image_view<in_image_type> in_img(in_img_);
+ image_view<out_image_type> out_img(out_img_);
+
+ // if there isn't any input image then don't do anything
+ if (in_img.size() == 0)
+ {
+ out_img.clear();
+ return rectangle();
+ }
+
+ out_img.set_size(in_img.nr(),in_img.nc());
+
+
+ // figure out the range that we should apply the filter to
+ const long first_row = filter.nr()/2;
+ const long first_col = filter.nc()/2;
+ const long last_row = in_img.nr() - ((filter.nr()-1)/2);
+ const long last_col = in_img.nc() - ((filter.nc()-1)/2);
+
+ const rectangle non_border = rectangle(first_col, first_row, last_col-1, last_row-1);
+ if (!add_to)
+ zero_border_pixels(out_img_, non_border);
+
+ // apply the filter to the image
+ for (long r = first_row; r < last_row; ++r)
+ {
+ for (long c = first_col; c < last_col; ++c)
+ {
+ typedef typename EXP::type ptype;
+ ptype p;
+ ptype temp = 0;
+ for (long m = 0; m < filter.nr(); ++m)
+ {
+ for (long n = 0; n < filter.nc(); ++n)
+ {
+ // pull out the current pixel and put it into p
+ p = get_pixel_intensity(in_img[r-first_row+m][c-first_col+n]);
+ temp += p*filter(m,n);
+ }
+ }
+
+ temp /= scale;
+
+ if (use_abs && temp < 0)
+ {
+ temp = -temp;
+ }
+
+ // save this pixel to the output image
+ if (add_to == false)
+ {
+ assign_pixel(out_img[r][c], temp);
+ }
+ else
+ {
+ assign_pixel(out_img[r][c], temp + out_img[r][c]);
+ }
+ }
+ }
+
+ return non_border;
+ }
+
+ // ------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type,
+ typename EXP
+ >
+ rectangle float_spatially_filter_image (
+ const in_image_type& in_img_,
+ out_image_type& out_img_,
+ const matrix_exp<EXP>& filter_,
+ bool add_to
+ )
+ {
+
+ const_temp_matrix<EXP> filter(filter_);
+ DLIB_ASSERT(filter.size() != 0,
+ "\trectangle spatially_filter_image()"
+ << "\n\t You can't give an empty filter."
+ << "\n\t filter.nr(): "<< filter.nr()
+ << "\n\t filter.nc(): "<< filter.nc()
+ );
+ DLIB_ASSERT(is_same_object(in_img_, out_img_) == false,
+ "\trectangle spatially_filter_image()"
+ << "\n\tYou must give two different image objects"
+ );
+
+
+ const_image_view<in_image_type> in_img(in_img_);
+ image_view<out_image_type> out_img(out_img_);
+
+ // if there isn't any input image then don't do anything
+ if (in_img.size() == 0)
+ {
+ out_img.clear();
+ return rectangle();
+ }
+
+ out_img.set_size(in_img.nr(),in_img.nc());
+
+
+ // figure out the range that we should apply the filter to
+ const long first_row = filter.nr()/2;
+ const long first_col = filter.nc()/2;
+ const long last_row = in_img.nr() - ((filter.nr()-1)/2);
+ const long last_col = in_img.nc() - ((filter.nc()-1)/2);
+
+ const rectangle non_border = rectangle(first_col, first_row, last_col-1, last_row-1);
+ if (!add_to)
+ zero_border_pixels(out_img_, non_border);
+
+ // apply the filter to the image
+ for (long r = first_row; r < last_row; ++r)
+ {
+ long c = first_col;
+ for (; c < last_col-7; c+=8)
+ {
+ simd8f p,p2,p3;
+ simd8f temp = 0, temp2=0, temp3=0;
+ for (long m = 0; m < filter.nr(); ++m)
+ {
+ long n = 0;
+ for (; n < filter.nc()-2; n+=3)
+ {
+ // pull out the current pixel and put it into p
+ p.load(&in_img[r-first_row+m][c-first_col+n]);
+ p2.load(&in_img[r-first_row+m][c-first_col+n+1]);
+ p3.load(&in_img[r-first_row+m][c-first_col+n+2]);
+ temp += p*filter(m,n);
+ temp2 += p2*filter(m,n+1);
+ temp3 += p3*filter(m,n+2);
+ }
+ for (; n < filter.nc(); ++n)
+ {
+ // pull out the current pixel and put it into p
+ p.load(&in_img[r-first_row+m][c-first_col+n]);
+ temp += p*filter(m,n);
+ }
+ }
+ temp += temp2+temp3;
+
+ // save this pixel to the output image
+ if (add_to == false)
+ {
+ temp.store(&out_img[r][c]);
+ }
+ else
+ {
+ p.load(&out_img[r][c]);
+ temp += p;
+ temp.store(&out_img[r][c]);
+ }
+ }
+ for (; c < last_col; ++c)
+ {
+ float p;
+ float temp = 0;
+ for (long m = 0; m < filter.nr(); ++m)
+ {
+ for (long n = 0; n < filter.nc(); ++n)
+ {
+ // pull out the current pixel and put it into p
+ p = in_img[r-first_row+m][c-first_col+n];
+ temp += p*filter(m,n);
+ }
+ }
+
+ // save this pixel to the output image
+ if (add_to == false)
+ {
+ out_img[r][c] = temp;
+ }
+ else
+ {
+ out_img[r][c] += temp;
+ }
+ }
+ }
+
+ return non_border;
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type,
+ typename EXP
+ >
+ struct is_float_filtering2
+ {
+ const static bool value = is_same_type<typename image_traits<in_image_type>::pixel_type,float>::value &&
+ is_same_type<typename image_traits<out_image_type>::pixel_type,float>::value &&
+ is_same_type<typename EXP::type,float>::value;
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type,
+ typename EXP,
+ typename T
+ >
+ typename enable_if_c<pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale &&
+ is_float_filtering2<in_image_type,out_image_type,EXP>::value,rectangle>::type
+ spatially_filter_image (
+ const in_image_type& in_img,
+ out_image_type& out_img,
+ const matrix_exp<EXP>& filter,
+ T scale,
+ bool use_abs = false,
+ bool add_to = false
+ )
+ {
+ if (use_abs == false)
+ {
+ if (scale == 1)
+ return impl::float_spatially_filter_image(in_img, out_img, filter, add_to);
+ else
+ return impl::float_spatially_filter_image(in_img, out_img, filter/scale, add_to);
+ }
+ else
+ {
+ return impl::grayscale_spatially_filter_image(in_img, out_img, filter, scale, true, add_to);
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type,
+ typename EXP,
+ typename T
+ >
+ typename enable_if_c<pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale &&
+ !is_float_filtering2<in_image_type,out_image_type,EXP>::value,rectangle>::type
+ spatially_filter_image (
+ const in_image_type& in_img,
+ out_image_type& out_img,
+ const matrix_exp<EXP>& filter,
+ T scale,
+ bool use_abs = false,
+ bool add_to = false
+ )
+ {
+ return impl::grayscale_spatially_filter_image(in_img,out_img,filter,scale,use_abs,add_to);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type,
+ typename EXP,
+ typename T
+ >
+ typename disable_if_c<pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale,rectangle>::type
+ spatially_filter_image (
+ const in_image_type& in_img_,
+ out_image_type& out_img_,
+ const matrix_exp<EXP>& filter_,
+ T scale
+ )
+ {
+ const_temp_matrix<EXP> filter(filter_);
+ COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false );
+ COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false );
+
+ DLIB_ASSERT(scale != 0 && filter.size() != 0,
+ "\trectangle spatially_filter_image()"
+ << "\n\t You can't give a scale of zero or an empty filter."
+ << "\n\t scale: "<< scale
+ << "\n\t filter.nr(): "<< filter.nr()
+ << "\n\t filter.nc(): "<< filter.nc()
+ );
+ DLIB_ASSERT(is_same_object(in_img_, out_img_) == false,
+ "\trectangle spatially_filter_image()"
+ << "\n\tYou must give two different image objects"
+ );
+
+
+ const_image_view<in_image_type> in_img(in_img_);
+ image_view<out_image_type> out_img(out_img_);
+
+ // if there isn't any input image then don't do anything
+ if (in_img.size() == 0)
+ {
+ out_img.clear();
+ return rectangle();
+ }
+
+ out_img.set_size(in_img.nr(),in_img.nc());
+
+
+ // figure out the range that we should apply the filter to
+ const long first_row = filter.nr()/2;
+ const long first_col = filter.nc()/2;
+ const long last_row = in_img.nr() - ((filter.nr()-1)/2);
+ const long last_col = in_img.nc() - ((filter.nc()-1)/2);
+
+ const rectangle non_border = rectangle(first_col, first_row, last_col-1, last_row-1);
+ zero_border_pixels(out_img, non_border);
+
+ // apply the filter to the image
+ for (long r = first_row; r < last_row; ++r)
+ {
+ for (long c = first_col; c < last_col; ++c)
+ {
+ typedef typename image_traits<in_image_type>::pixel_type pixel_type;
+ typedef matrix<typename EXP::type,pixel_traits<pixel_type>::num,1> ptype;
+ ptype p;
+ ptype temp;
+ temp = 0;
+ for (long m = 0; m < filter.nr(); ++m)
+ {
+ for (long n = 0; n < filter.nc(); ++n)
+ {
+ // pull out the current pixel and put it into p
+ p = pixel_to_vector<typename EXP::type>(in_img[r-first_row+m][c-first_col+n]);
+ temp += p*filter(m,n);
+ }
+ }
+
+ temp /= scale;
+
+ pixel_type pp;
+ vector_to_pixel(pp, temp);
+ assign_pixel(out_img[r][c], pp);
+ }
+ }
+
+ return non_border;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type,
+ typename EXP
+ >
+ rectangle spatially_filter_image (
+ const in_image_type& in_img,
+ out_image_type& out_img,
+ const matrix_exp<EXP>& filter
+ )
+ {
+ return spatially_filter_image(in_img,out_img,filter,1);
+ }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ namespace impl
+ {
+ template <
+ typename in_image_type,
+ typename out_image_type,
+ typename EXP1,
+ typename EXP2,
+ typename T
+ >
+ rectangle grayscale_spatially_filter_image_separable (
+ const in_image_type& in_img_,
+ out_image_type& out_img_,
+ const matrix_exp<EXP1>& _row_filter,
+ const matrix_exp<EXP2>& _col_filter,
+ T scale,
+ bool use_abs,
+ bool add_to
+ )
+ {
+ const_temp_matrix<EXP1> row_filter(_row_filter);
+ const_temp_matrix<EXP2> col_filter(_col_filter);
+ COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false );
+ COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false );
+
+ DLIB_ASSERT(scale != 0 && row_filter.size() != 0 && col_filter.size() != 0 &&
+ is_vector(row_filter) &&
+ is_vector(col_filter),
+ "\trectangle spatially_filter_image_separable()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t scale: "<< scale
+ << "\n\t row_filter.size(): "<< row_filter.size()
+ << "\n\t col_filter.size(): "<< col_filter.size()
+ << "\n\t is_vector(row_filter): "<< is_vector(row_filter)
+ << "\n\t is_vector(col_filter): "<< is_vector(col_filter)
+ );
+ DLIB_ASSERT(is_same_object(in_img_, out_img_) == false,
+ "\trectangle spatially_filter_image_separable()"
+ << "\n\tYou must give two different image objects"
+ );
+
+
+ const_image_view<in_image_type> in_img(in_img_);
+ image_view<out_image_type> out_img(out_img_);
+
+ // if there isn't any input image then don't do anything
+ if (in_img.size() == 0)
+ {
+ out_img.clear();
+ return rectangle();
+ }
+
+ out_img.set_size(in_img.nr(),in_img.nc());
+
+
+ // figure out the range that we should apply the filter to
+ const long first_row = col_filter.size()/2;
+ const long first_col = row_filter.size()/2;
+ const long last_row = in_img.nr() - ((col_filter.size()-1)/2);
+ const long last_col = in_img.nc() - ((row_filter.size()-1)/2);
+
+ const rectangle non_border = rectangle(first_col, first_row, last_col-1, last_row-1);
+ if (!add_to)
+ zero_border_pixels(out_img, non_border);
+
+ typedef typename EXP1::type ptype;
+
+ array2d<ptype> temp_img;
+ temp_img.set_size(in_img.nr(), in_img.nc());
+
+ // apply the row filter
+ for (long r = 0; r < in_img.nr(); ++r)
+ {
+ for (long c = first_col; c < last_col; ++c)
+ {
+ ptype p;
+ ptype temp = 0;
+ for (long n = 0; n < row_filter.size(); ++n)
+ {
+ // pull out the current pixel and put it into p
+ p = get_pixel_intensity(in_img[r][c-first_col+n]);
+ temp += p*row_filter(n);
+ }
+ temp_img[r][c] = temp;
+ }
+ }
+
+ // apply the column filter
+ for (long r = first_row; r < last_row; ++r)
+ {
+ for (long c = first_col; c < last_col; ++c)
+ {
+ ptype temp = 0;
+ for (long m = 0; m < col_filter.size(); ++m)
+ {
+ temp += temp_img[r-first_row+m][c]*col_filter(m);
+ }
+
+ temp /= scale;
+
+ if (use_abs && temp < 0)
+ {
+ temp = -temp;
+ }
+
+ // save this pixel to the output image
+ if (add_to == false)
+ {
+ assign_pixel(out_img[r][c], temp);
+ }
+ else
+ {
+ assign_pixel(out_img[r][c], temp + out_img[r][c]);
+ }
+ }
+ }
+ return non_border;
+ }
+
+ } // namespace impl
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type,
+ typename EXP1,
+ typename EXP2
+ >
+ struct is_float_filtering
+ {
+ const static bool value = is_same_type<typename image_traits<in_image_type>::pixel_type,float>::value &&
+ is_same_type<typename image_traits<out_image_type>::pixel_type,float>::value &&
+ is_same_type<typename EXP1::type,float>::value &&
+ is_same_type<typename EXP2::type,float>::value;
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ // This overload is optimized to use SIMD instructions when filtering float images with
+ // float filters.
+ template <
+ typename in_image_type,
+ typename out_image_type,
+ typename EXP1,
+ typename EXP2
+ >
+ rectangle float_spatially_filter_image_separable (
+ const in_image_type& in_img_,
+ out_image_type& out_img_,
+ const matrix_exp<EXP1>& _row_filter,
+ const matrix_exp<EXP2>& _col_filter,
+ out_image_type& scratch_,
+ bool add_to = false
+ )
+ {
+ // You can only use this function with images and filters containing float
+ // variables.
+ COMPILE_TIME_ASSERT((is_float_filtering<in_image_type,out_image_type,EXP1,EXP2>::value == true));
+
+
+ const_temp_matrix<EXP1> row_filter(_row_filter);
+ const_temp_matrix<EXP2> col_filter(_col_filter);
+ DLIB_ASSERT(row_filter.size() != 0 && col_filter.size() != 0 &&
+ is_vector(row_filter) &&
+ is_vector(col_filter),
+ "\trectangle float_spatially_filter_image_separable()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t row_filter.size(): "<< row_filter.size()
+ << "\n\t col_filter.size(): "<< col_filter.size()
+ << "\n\t is_vector(row_filter): "<< is_vector(row_filter)
+ << "\n\t is_vector(col_filter): "<< is_vector(col_filter)
+ );
+ DLIB_ASSERT(is_same_object(in_img_, out_img_) == false,
+ "\trectangle float_spatially_filter_image_separable()"
+ << "\n\tYou must give two different image objects"
+ );
+
+
+ const_image_view<in_image_type> in_img(in_img_);
+ image_view<out_image_type> out_img(out_img_);
+
+ // if there isn't any input image then don't do anything
+ if (in_img.size() == 0)
+ {
+ out_img.clear();
+ return rectangle();
+ }
+
+ out_img.set_size(in_img.nr(),in_img.nc());
+
+ // figure out the range that we should apply the filter to
+ const long first_row = col_filter.size()/2;
+ const long first_col = row_filter.size()/2;
+ const long last_row = in_img.nr() - ((col_filter.size()-1)/2);
+ const long last_col = in_img.nc() - ((row_filter.size()-1)/2);
+
+ const rectangle non_border = rectangle(first_col, first_row, last_col-1, last_row-1);
+ if (!add_to)
+ zero_border_pixels(out_img, non_border);
+
+ image_view<out_image_type> scratch(scratch_);
+ scratch.set_size(in_img.nr(), in_img.nc());
+
+ // apply the row filter
+ for (long r = 0; r < in_img.nr(); ++r)
+ {
+ long c = first_col;
+ for (; c < last_col-7; c+=8)
+ {
+ simd8f p,p2,p3, temp = 0, temp2=0, temp3=0;
+ long n = 0;
+ for (; n < row_filter.size()-2; n+=3)
+ {
+ // pull out the current pixel and put it into p
+ p.load(&in_img[r][c-first_col+n]);
+ p2.load(&in_img[r][c-first_col+n+1]);
+ p3.load(&in_img[r][c-first_col+n+2]);
+ temp += p*row_filter(n);
+ temp2 += p2*row_filter(n+1);
+ temp3 += p3*row_filter(n+2);
+ }
+ for (; n < row_filter.size(); ++n)
+ {
+ // pull out the current pixel and put it into p
+ p.load(&in_img[r][c-first_col+n]);
+ temp += p*row_filter(n);
+ }
+ temp += temp2 + temp3;
+ temp.store(&scratch[r][c]);
+ }
+ for (; c < last_col; ++c)
+ {
+ float p;
+ float temp = 0;
+ for (long n = 0; n < row_filter.size(); ++n)
+ {
+ // pull out the current pixel and put it into p
+ p = in_img[r][c-first_col+n];
+ temp += p*row_filter(n);
+ }
+ scratch[r][c] = temp;
+ }
+ }
+
+ // apply the column filter
+ for (long r = first_row; r < last_row; ++r)
+ {
+ long c = first_col;
+ for (; c < last_col-7; c+=8)
+ {
+ simd8f p, p2, p3, temp = 0, temp2 = 0, temp3 = 0;
+ long m = 0;
+ for (; m < col_filter.size()-2; m+=3)
+ {
+ p.load(&scratch[r-first_row+m][c]);
+ p2.load(&scratch[r-first_row+m+1][c]);
+ p3.load(&scratch[r-first_row+m+2][c]);
+ temp += p*col_filter(m);
+ temp2 += p2*col_filter(m+1);
+ temp3 += p3*col_filter(m+2);
+ }
+ for (; m < col_filter.size(); ++m)
+ {
+ p.load(&scratch[r-first_row+m][c]);
+ temp += p*col_filter(m);
+ }
+ temp += temp2+temp3;
+
+ // save this pixel to the output image
+ if (add_to == false)
+ {
+ temp.store(&out_img[r][c]);
+ }
+ else
+ {
+ p.load(&out_img[r][c]);
+ temp += p;
+ temp.store(&out_img[r][c]);
+ }
+ }
+ for (; c < last_col; ++c)
+ {
+ float temp = 0;
+ for (long m = 0; m < col_filter.size(); ++m)
+ {
+ temp += scratch[r-first_row+m][c]*col_filter(m);
+ }
+
+ // save this pixel to the output image
+ if (add_to == false)
+ {
+ out_img[r][c] = temp;
+ }
+ else
+ {
+ out_img[r][c] += temp;
+ }
+ }
+ }
+ return non_border;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type,
+ typename EXP1,
+ typename EXP2,
+ typename T
+ >
+ typename enable_if_c<pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale &&
+ is_float_filtering<in_image_type,out_image_type,EXP1,EXP2>::value,rectangle>::type
+ spatially_filter_image_separable (
+ const in_image_type& in_img,
+ out_image_type& out_img,
+ const matrix_exp<EXP1>& row_filter,
+ const matrix_exp<EXP2>& col_filter,
+ T scale,
+ bool use_abs = false,
+ bool add_to = false
+ )
+ {
+ if (use_abs == false)
+ {
+ out_image_type scratch;
+ if (scale == 1)
+ return float_spatially_filter_image_separable(in_img, out_img, row_filter, col_filter, scratch, add_to);
+ else
+ return float_spatially_filter_image_separable(in_img, out_img, row_filter/scale, col_filter, scratch, add_to);
+ }
+ else
+ {
+ return impl::grayscale_spatially_filter_image_separable(in_img, out_img, row_filter, col_filter, scale, true, add_to);
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type,
+ typename EXP1,
+ typename EXP2,
+ typename T
+ >
+ typename enable_if_c<pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale &&
+ !is_float_filtering<in_image_type,out_image_type,EXP1,EXP2>::value,rectangle>::type
+ spatially_filter_image_separable (
+ const in_image_type& in_img,
+ out_image_type& out_img,
+ const matrix_exp<EXP1>& row_filter,
+ const matrix_exp<EXP2>& col_filter,
+ T scale,
+ bool use_abs = false,
+ bool add_to = false
+ )
+ {
+ return impl::grayscale_spatially_filter_image_separable(in_img,out_img, row_filter, col_filter, scale, use_abs, add_to);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type,
+ typename EXP1,
+ typename EXP2,
+ typename T
+ >
+ typename disable_if_c<pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale,rectangle>::type
+ spatially_filter_image_separable (
+ const in_image_type& in_img_,
+ out_image_type& out_img_,
+ const matrix_exp<EXP1>& _row_filter,
+ const matrix_exp<EXP2>& _col_filter,
+ T scale
+ )
+ {
+ const_temp_matrix<EXP1> row_filter(_row_filter);
+ const_temp_matrix<EXP2> col_filter(_col_filter);
+ COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false );
+ COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false );
+
+ DLIB_ASSERT(scale != 0 && row_filter.size() != 0 && col_filter.size() != 0 &&
+ is_vector(row_filter) &&
+ is_vector(col_filter),
+ "\trectangle spatially_filter_image_separable()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t scale: "<< scale
+ << "\n\t row_filter.size(): "<< row_filter.size()
+ << "\n\t col_filter.size(): "<< col_filter.size()
+ << "\n\t is_vector(row_filter): "<< is_vector(row_filter)
+ << "\n\t is_vector(col_filter): "<< is_vector(col_filter)
+ );
+ DLIB_ASSERT(is_same_object(in_img_, out_img_) == false,
+ "\trectangle spatially_filter_image_separable()"
+ << "\n\tYou must give two different image objects"
+ );
+
+
+ const_image_view<in_image_type> in_img(in_img_);
+ image_view<out_image_type> out_img(out_img_);
+
+ // if there isn't any input image then don't do anything
+ if (in_img.size() == 0)
+ {
+ out_img.clear();
+ return rectangle();
+ }
+
+ out_img.set_size(in_img.nr(),in_img.nc());
+
+
+ // figure out the range that we should apply the filter to
+ const long first_row = col_filter.size()/2;
+ const long first_col = row_filter.size()/2;
+ const long last_row = in_img.nr() - ((col_filter.size()-1)/2);
+ const long last_col = in_img.nc() - ((row_filter.size()-1)/2);
+
+ const rectangle non_border = rectangle(first_col, first_row, last_col-1, last_row-1);
+ zero_border_pixels(out_img, non_border);
+
+ typedef typename image_traits<in_image_type>::pixel_type pixel_type;
+ typedef matrix<typename EXP1::type,pixel_traits<pixel_type>::num,1> ptype;
+
+ array2d<ptype> temp_img;
+ temp_img.set_size(in_img.nr(), in_img.nc());
+
+ // apply the row filter
+ for (long r = 0; r < in_img.nr(); ++r)
+ {
+ for (long c = first_col; c < last_col; ++c)
+ {
+ ptype p;
+ ptype temp;
+ temp = 0;
+ for (long n = 0; n < row_filter.size(); ++n)
+ {
+ // pull out the current pixel and put it into p
+ p = pixel_to_vector<typename EXP1::type>(in_img[r][c-first_col+n]);
+ temp += p*row_filter(n);
+ }
+ temp_img[r][c] = temp;
+ }
+ }
+
+ // apply the column filter
+ for (long r = first_row; r < last_row; ++r)
+ {
+ for (long c = first_col; c < last_col; ++c)
+ {
+ ptype temp;
+ temp = 0;
+ for (long m = 0; m < col_filter.size(); ++m)
+ {
+ temp += temp_img[r-first_row+m][c]*col_filter(m);
+ }
+
+ temp /= scale;
+
+
+ // save this pixel to the output image
+ pixel_type p;
+ vector_to_pixel(p, temp);
+ assign_pixel(out_img[r][c], p);
+ }
+ }
+ return non_border;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type,
+ typename EXP1,
+ typename EXP2
+ >
+ rectangle spatially_filter_image_separable (
+ const in_image_type& in_img,
+ out_image_type& out_img,
+ const matrix_exp<EXP1>& row_filter,
+ const matrix_exp<EXP2>& col_filter
+ )
+ {
+ return spatially_filter_image_separable(in_img,out_img,row_filter,col_filter,1);
+ }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type,
+ typename EXP1,
+ typename EXP2,
+ typename T
+ >
+ rectangle spatially_filter_image_separable_down (
+ const unsigned long downsample,
+ const in_image_type& in_img_,
+ out_image_type& out_img_,
+ const matrix_exp<EXP1>& row_filter,
+ const matrix_exp<EXP2>& col_filter,
+ T scale,
+ bool use_abs = false,
+ bool add_to = false
+ )
+ {
+ COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false );
+ COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false );
+ COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale == true );
+
+ DLIB_ASSERT(downsample > 0 &&
+ scale != 0 &&
+ row_filter.size()%2 == 1 &&
+ col_filter.size()%2 == 1 &&
+ is_vector(row_filter) &&
+ is_vector(col_filter),
+ "\trectangle spatially_filter_image_separable_down()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t downsample: "<< downsample
+ << "\n\t scale: "<< scale
+ << "\n\t row_filter.size(): "<< row_filter.size()
+ << "\n\t col_filter.size(): "<< col_filter.size()
+ << "\n\t is_vector(row_filter): "<< is_vector(row_filter)
+ << "\n\t is_vector(col_filter): "<< is_vector(col_filter)
+ );
+ DLIB_ASSERT(is_same_object(in_img_, out_img_) == false,
+ "\trectangle spatially_filter_image_separable_down()"
+ << "\n\tYou must give two different image objects"
+ );
+
+
+ const_image_view<in_image_type> in_img(in_img_);
+ image_view<out_image_type> out_img(out_img_);
+
+ // if there isn't any input image then don't do anything
+ if (in_img.size() == 0)
+ {
+ out_img.clear();
+ return rectangle();
+ }
+
+ out_img.set_size((long)(std::ceil((double)in_img.nr()/downsample)),
+ (long)(std::ceil((double)in_img.nc()/downsample)));
+
+ const double col_border = std::floor(col_filter.size()/2.0);
+ const double row_border = std::floor(row_filter.size()/2.0);
+
+ // figure out the range that we should apply the filter to
+ const long first_row = (long)std::ceil(col_border/downsample);
+ const long first_col = (long)std::ceil(row_border/downsample);
+ const long last_row = (long)std::ceil((in_img.nr() - col_border)/downsample) - 1;
+ const long last_col = (long)std::ceil((in_img.nc() - row_border)/downsample) - 1;
+
+ // zero border pixels
+ const rectangle non_border = rectangle(first_col, first_row, last_col, last_row);
+ zero_border_pixels(out_img,non_border);
+
+ typedef typename EXP1::type ptype;
+
+ array2d<ptype> temp_img;
+ temp_img.set_size(in_img.nr(), out_img.nc());
+
+ // apply the row filter
+ for (long r = 0; r < temp_img.nr(); ++r)
+ {
+ for (long c = non_border.left(); c <= non_border.right(); ++c)
+ {
+ ptype p;
+ ptype temp = 0;
+ for (long n = 0; n < row_filter.size(); ++n)
+ {
+ // pull out the current pixel and put it into p
+ p = get_pixel_intensity(in_img[r][c*downsample-row_filter.size()/2+n]);
+ temp += p*row_filter(n);
+ }
+ temp_img[r][c] = temp;
+ }
+ }
+
+ // apply the column filter
+ for (long r = non_border.top(); r <= non_border.bottom(); ++r)
+ {
+ for (long c = non_border.left(); c <= non_border.right(); ++c)
+ {
+ ptype temp = 0;
+ for (long m = 0; m < col_filter.size(); ++m)
+ {
+ temp += temp_img[r*downsample-col_filter.size()/2+m][c]*col_filter(m);
+ }
+
+ temp /= scale;
+
+ if (use_abs && temp < 0)
+ {
+ temp = -temp;
+ }
+
+ // save this pixel to the output image
+ if (add_to == false)
+ {
+ assign_pixel(out_img[r][c], temp);
+ }
+ else
+ {
+ assign_pixel(out_img[r][c], temp + out_img[r][c]);
+ }
+ }
+ }
+
+ return non_border;
+ }
+
+ template <
+ typename in_image_type,
+ typename out_image_type,
+ typename EXP1,
+ typename EXP2
+ >
+ rectangle spatially_filter_image_separable_down (
+ const unsigned long downsample,
+ const in_image_type& in_img,
+ out_image_type& out_img,
+ const matrix_exp<EXP1>& row_filter,
+ const matrix_exp<EXP2>& col_filter
+ )
+ {
+ return spatially_filter_image_separable_down(downsample,in_img,out_img,row_filter,col_filter,1);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ long NR,
+ long NC,
+ typename T,
+ typename U,
+ typename in_image_type
+ >
+ inline void separable_3x3_filter_block_grayscale (
+ T (&block)[NR][NC],
+ const in_image_type& img_,
+ const long& r,
+ const long& c,
+ const U& fe1, // separable filter end
+ const U& fm, // separable filter middle
+ const U& fe2 // separable filter end 2
+ )
+ {
+ const_image_view<in_image_type> img(img_);
+ // make sure requires clause is not broken
+ DLIB_ASSERT(shrink_rect(get_rect(img),1).contains(c,r) &&
+ shrink_rect(get_rect(img),1).contains(c+NC-1,r+NR-1),
+ "\t void separable_3x3_filter_block_grayscale()"
+ << "\n\t The sub-window doesn't fit inside the given image."
+ << "\n\t get_rect(img): " << get_rect(img)
+ << "\n\t (c,r): " << point(c,r)
+ << "\n\t (c+NC-1,r+NR-1): " << point(c+NC-1,r+NR-1)
+ );
+
+
+ T row_filt[NR+2][NC];
+ for (long rr = 0; rr < NR+2; ++rr)
+ {
+ for (long cc = 0; cc < NC; ++cc)
+ {
+ row_filt[rr][cc] = get_pixel_intensity(img[r+rr-1][c+cc-1])*fe1 +
+ get_pixel_intensity(img[r+rr-1][c+cc])*fm +
+ get_pixel_intensity(img[r+rr-1][c+cc+1])*fe2;
+ }
+ }
+
+ for (long rr = 0; rr < NR; ++rr)
+ {
+ for (long cc = 0; cc < NC; ++cc)
+ {
+ block[rr][cc] = (row_filt[rr][cc]*fe1 +
+ row_filt[rr+1][cc]*fm +
+ row_filt[rr+2][cc]*fe2);
+ }
+ }
+
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ long NR,
+ long NC,
+ typename T,
+ typename U,
+ typename in_image_type
+ >
+ inline void separable_3x3_filter_block_rgb (
+ T (&block)[NR][NC],
+ const in_image_type& img_,
+ const long& r,
+ const long& c,
+ const U& fe1, // separable filter end
+ const U& fm, // separable filter middle
+ const U& fe2 // separable filter end 2
+ )
+ {
+ const_image_view<in_image_type> img(img_);
+ // make sure requires clause is not broken
+ DLIB_ASSERT(shrink_rect(get_rect(img),1).contains(c,r) &&
+ shrink_rect(get_rect(img),1).contains(c+NC-1,r+NR-1),
+ "\t void separable_3x3_filter_block_rgb()"
+ << "\n\t The sub-window doesn't fit inside the given image."
+ << "\n\t get_rect(img): " << get_rect(img)
+ << "\n\t (c,r): " << point(c,r)
+ << "\n\t (c+NC-1,r+NR-1): " << point(c+NC-1,r+NR-1)
+ );
+
+ T row_filt[NR+2][NC];
+ for (long rr = 0; rr < NR+2; ++rr)
+ {
+ for (long cc = 0; cc < NC; ++cc)
+ {
+ row_filt[rr][cc].red = img[r+rr-1][c+cc-1].red*fe1 + img[r+rr-1][c+cc].red*fm + img[r+rr-1][c+cc+1].red*fe2;
+ row_filt[rr][cc].green = img[r+rr-1][c+cc-1].green*fe1 + img[r+rr-1][c+cc].green*fm + img[r+rr-1][c+cc+1].green*fe2;
+ row_filt[rr][cc].blue = img[r+rr-1][c+cc-1].blue*fe1 + img[r+rr-1][c+cc].blue*fm + img[r+rr-1][c+cc+1].blue*fe2;
+ }
+ }
+
+ for (long rr = 0; rr < NR; ++rr)
+ {
+ for (long cc = 0; cc < NC; ++cc)
+ {
+ block[rr][cc].red = row_filt[rr][cc].red*fe1 + row_filt[rr+1][cc].red*fm + row_filt[rr+2][cc].red*fe2;
+ block[rr][cc].green = row_filt[rr][cc].green*fe1 + row_filt[rr+1][cc].green*fm + row_filt[rr+2][cc].green*fe2;
+ block[rr][cc].blue = row_filt[rr][cc].blue*fe1 + row_filt[rr+1][cc].blue*fm + row_filt[rr+2][cc].blue*fe2;
+ }
+ }
+
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ inline double gaussian (
+ double x,
+ double sigma
+ )
+ {
+ DLIB_ASSERT(sigma > 0,
+ "\tdouble gaussian(x)"
+ << "\n\t sigma must be bigger than 0"
+ << "\n\t sigma: " << sigma
+ );
+ const double sqrt_2_pi = 2.5066282746310002416123552393401041626930;
+ return 1.0/(sigma*sqrt_2_pi) * std::exp( -(x*x)/(2*sigma*sigma));
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename T
+ >
+ matrix<T,0,1> create_gaussian_filter (
+ double sigma,
+ int max_size
+ )
+ {
+ DLIB_ASSERT(sigma > 0 && max_size > 0 && (max_size%2)==1,
+ "\t matrix<T,0,1> create_gaussian_filter()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t sigma: " << sigma
+ << "\n\t max_size: " << max_size
+ );
+
+ // Adjust the size so that the ratio of the gaussian values isn't huge.
+ // This only matters when T is an integer type. However, we do it for
+ // all types so that the behavior of this function is always relatively
+ // the same.
+ while (gaussian(0,sigma)/gaussian(max_size/2,sigma) > 50)
+ --max_size;
+
+
+ matrix<double,0,1> f(max_size);
+ for (long i = 0; i < f.size(); ++i)
+ {
+ f(i) = gaussian(i-max_size/2, sigma);
+ }
+
+ if (is_float_type<T>::value == false)
+ {
+ f /= f(0);
+ return matrix_cast<T>(round(f));
+ }
+ else
+ {
+ return matrix_cast<T>(f);
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type
+ >
+ rectangle gaussian_blur (
+ const in_image_type& in_img,
+ out_image_type& out_img,
+ double sigma = 1,
+ int max_size = 1001
+ )
+ {
+ DLIB_ASSERT(sigma > 0 && max_size > 0 && (max_size%2)==1 &&
+ is_same_object(in_img, out_img) == false,
+ "\t void gaussian_blur()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t sigma: " << sigma
+ << "\n\t max_size: " << max_size
+ << "\n\t is_same_object(in_img,out_img): " << is_same_object(in_img,out_img)
+ );
+
+ if (sigma < 18)
+ {
+ typedef typename pixel_traits<typename image_traits<out_image_type>::pixel_type>::basic_pixel_type type;
+ typedef typename promote<type>::type ptype;
+ const matrix<ptype,0,1>& filt = create_gaussian_filter<ptype>(sigma, max_size);
+ ptype scale = sum(filt);
+ scale = scale*scale;
+ return spatially_filter_image_separable(in_img, out_img, filt, filt, scale);
+ }
+ else
+ {
+ // For large sigma we need to use a type with a lot of precision to avoid
+ // numerical problems. So we use double here.
+ typedef double ptype;
+ const matrix<ptype,0,1>& filt = create_gaussian_filter<ptype>(sigma, max_size);
+ ptype scale = sum(filt);
+ scale = scale*scale;
+ return spatially_filter_image_separable(in_img, out_img, filt, filt, scale);
+ }
+
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ namespace impl
+ {
+ template <
+ bool add_to,
+ typename image_type1,
+ typename image_type2
+ >
+ void sum_filter (
+ const image_type1& img_,
+ image_type2& out_,
+ const rectangle& rect
+ )
+ {
+ const_image_view<image_type1> img(img_);
+ image_view<image_type2> out(out_);
+ DLIB_ASSERT(img.nr() == out.nr() &&
+ img.nc() == out.nc() &&
+ is_same_object(img_,out_) == false,
+ "\t void sum_filter()"
+ << "\n\t Invalid arguments given to this function."
+ << "\n\t img.nr(): " << img.nr()
+ << "\n\t img.nc(): " << img.nc()
+ << "\n\t out.nr(): " << out.nr()
+ << "\n\t out.nc(): " << out.nc()
+ << "\n\t is_same_object(img_,out_): " << is_same_object(img_,out_)
+ );
+
+ typedef typename image_traits<image_type1>::pixel_type pixel_type;
+ typedef typename promote<pixel_type>::type ptype;
+
+ std::vector<ptype> column_sum;
+ column_sum.resize(img.nc() + rect.width(),0);
+
+ const long top = -1 + rect.top();
+ const long bottom = -1 + rect.bottom();
+ long left = rect.left()-1;
+
+ // initialize column_sum at row -1
+ for (unsigned long j = 0; j < column_sum.size(); ++j)
+ {
+ rectangle strip(left,top,left,bottom);
+ strip = strip.intersect(get_rect(img));
+ if (!strip.is_empty())
+ {
+ column_sum[j] = sum(matrix_cast<ptype>(subm(mat(img),strip)));
+ }
+
+ ++left;
+ }
+
+
+ const rectangle area = get_rect(img);
+
+ // Save width to avoid computing it over and over.
+ const long width = rect.width();
+
+
+ // Now do the bulk of the filtering work.
+ for (long r = 0; r < img.nr(); ++r)
+ {
+ // set to sum at point(-1,r). i.e. should be equal to sum(mat(img), translate_rect(rect, point(-1,r)))
+ // We compute it's value in the next loop.
+ ptype cur_sum = 0;
+
+ // Update the first part of column_sum since we only work on the c+width part of column_sum
+ // in the main loop.
+ const long top = r + rect.top() - 1;
+ const long bottom = r + rect.bottom();
+ for (long k = 0; k < width; ++k)
+ {
+ const long right = k-width + rect.right();
+
+ const ptype br_corner = area.contains(right,bottom) ? img[bottom][right] : 0;
+ const ptype tr_corner = area.contains(right,top) ? img[top][right] : 0;
+ // update the sum in this column now that we are on the next row
+ column_sum[k] = column_sum[k] + br_corner - tr_corner;
+ cur_sum += column_sum[k];
+ }
+
+ for (long c = 0; c < img.nc(); ++c)
+ {
+ const long top = r + rect.top() - 1;
+ const long bottom = r + rect.bottom();
+ const long right = c + rect.right();
+
+ const ptype br_corner = area.contains(right,bottom) ? img[bottom][right] : 0;
+ const ptype tr_corner = area.contains(right,top) ? img[top][right] : 0;
+
+ // update the sum in this column now that we are on the next row
+ column_sum[c+width] = column_sum[c+width] + br_corner - tr_corner;
+
+ // add in the new right side of the rect and subtract the old right side.
+ cur_sum = cur_sum + column_sum[c+width] - column_sum[c];
+
+ if (add_to)
+ out[r][c] += static_cast<typename image_traits<image_type2>::pixel_type>(cur_sum);
+ else
+ out[r][c] = static_cast<typename image_traits<image_type2>::pixel_type>(cur_sum);
+ }
+ }
+ }
+ }
+
+ template <
+ typename image_type1,
+ typename image_type2
+ >
+ void sum_filter (
+ const image_type1& img,
+ image_type2& out,
+ const rectangle& rect
+ )
+ {
+ impl::sum_filter<true>(img,out,rect);
+ }
+
+ template <
+ typename image_type1,
+ typename image_type2
+ >
+ void sum_filter_assign (
+ const image_type1& img,
+ image_type2& out,
+ const rectangle& rect
+ )
+ {
+ set_image_size(out, num_rows(img), num_columns(img));
+ impl::sum_filter<false>(img,out,rect);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ namespace impl
+ {
+ template <typename T>
+ class fast_deque
+ {
+ /*
+ This is a fast and minimal implementation of std::deque for
+ use with the max_filter.
+
+ This object assumes that no more than max_size elements
+ will ever be pushed into it at a time.
+ */
+ public:
+
+ explicit fast_deque(unsigned long max_size)
+ {
+ // find a power of two that upper bounds max_size
+ mask = 2;
+ while (mask < max_size)
+ mask *= 2;
+
+ clear();
+
+ data.resize(mask);
+ --mask; // make into bit mask
+ }
+
+ void clear()
+ {
+ first = 1;
+ last = 0;
+ size = 0;
+ }
+
+ bool empty() const
+ {
+ return size == 0;
+ }
+
+ void pop_back()
+ {
+ last = (last-1)&mask;
+ --size;
+ }
+
+ void push_back(const T& item)
+ {
+ last = (last+1)&mask;
+ ++size;
+ data[last] = item;
+ }
+
+ void pop_front()
+ {
+ first = (first+1)&mask;
+ --size;
+ }
+
+ const T& front() const
+ {
+ return data[first];
+ }
+
+ const T& back() const
+ {
+ return data[last];
+ }
+
+ private:
+
+ std::vector<T> data;
+ unsigned long mask;
+ unsigned long first;
+ unsigned long last;
+ unsigned long size;
+ };
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type1,
+ typename image_type2
+ >
+ void max_filter (
+ image_type1& img_,
+ image_type2& out_,
+ const long width,
+ const long height,
+ const typename image_traits<image_type1>::pixel_type& thresh
+ )
+ {
+ image_view<image_type1> img(img_);
+ image_view<image_type2> out(out_);
+ DLIB_ASSERT( width > 0 &&
+ height > 0 &&
+ out.nr() == img.nr() &&
+ out.nc() == img.nc() &&
+ is_same_object(img_,out_) == false,
+ "\t void max_filter()"
+ << "\n\t Invalid arguments given to this function."
+ << "\n\t img.nr(): " << img.nr()
+ << "\n\t img.nc(): " << img.nc()
+ << "\n\t out.nr(): " << out.nr()
+ << "\n\t out.nc(): " << out.nc()
+ << "\n\t width: " << width
+ << "\n\t height: " << height
+ << "\n\t is_same_object(img_,out_): " << is_same_object(img_,out_)
+ );
+
+ typedef typename image_traits<image_type1>::pixel_type pixel_type;
+
+
+ dlib::impl::fast_deque<std::pair<long,pixel_type> > Q(std::max(width,height));
+
+ const long last_col = std::max(img.nc(), ((width-1)/2));
+ const long last_row = std::max(img.nr(), ((height-1)/2));
+
+ // run max filter along rows of img
+ for (long r = 0; r < img.nr(); ++r)
+ {
+ Q.clear();
+ for (long c = 0; c < (width-1)/2 && c < img.nc(); ++c)
+ {
+ while (!Q.empty() && img[r][c] >= Q.back().second)
+ Q.pop_back();
+ Q.push_back(std::make_pair(c,img[r][c]));
+ }
+
+ for (long c = (width-1)/2; c < img.nc(); ++c)
+ {
+ while (!Q.empty() && img[r][c] >= Q.back().second)
+ Q.pop_back();
+ while (!Q.empty() && Q.front().first <= c-width)
+ Q.pop_front();
+ Q.push_back(std::make_pair(c,img[r][c]));
+
+ img[r][c-((width-1)/2)] = Q.front().second;
+ }
+
+ for (long c = last_col; c < img.nc() + ((width-1)/2); ++c)
+ {
+ while (!Q.empty() && Q.front().first <= c-width)
+ Q.pop_front();
+
+ img[r][c-((width-1)/2)] = Q.front().second;
+ }
+ }
+
+ // run max filter along columns of img. Store result in out.
+ for (long cc = 0; cc < img.nc(); ++cc)
+ {
+ Q.clear();
+ for (long rr = 0; rr < (height-1)/2 && rr < img.nr(); ++rr)
+ {
+ while (!Q.empty() && img[rr][cc] >= Q.back().second)
+ Q.pop_back();
+ Q.push_back(std::make_pair(rr,img[rr][cc]));
+ }
+
+ for (long rr = (height-1)/2; rr < img.nr(); ++rr)
+ {
+ while (!Q.empty() && img[rr][cc] >= Q.back().second)
+ Q.pop_back();
+ while (!Q.empty() && Q.front().first <= rr-height)
+ Q.pop_front();
+ Q.push_back(std::make_pair(rr,img[rr][cc]));
+
+ out[rr-((height-1)/2)][cc] += std::max(Q.front().second, thresh);
+ }
+
+ for (long rr = last_row; rr < img.nr() + ((height-1)/2); ++rr)
+ {
+ while (!Q.empty() && Q.front().first <= rr-height)
+ Q.pop_front();
+
+ out[rr-((height-1)/2)][cc] += std::max(Q.front().second, thresh);
+ }
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SPATIAL_FILTERINg_H_
+
+
diff --git a/ml/dlib/dlib/image_transforms/spatial_filtering_abstract.h b/ml/dlib/dlib/image_transforms/spatial_filtering_abstract.h
new file mode 100644
index 000000000..5e200aa9a
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/spatial_filtering_abstract.h
@@ -0,0 +1,487 @@
+// Copyright (C) 2006 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_SPATIAL_FILTERINg_ABSTRACT_
+#ifdef DLIB_SPATIAL_FILTERINg_ABSTRACT_
+
+#include "../pixel.h"
+#include "../matrix.h"
+#include "../image_processing/generic_image.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type,
+ typename EXP,
+ typename T
+ >
+ rectangle spatially_filter_image (
+ const in_image_type& in_img,
+ out_image_type& out_img,
+ const matrix_exp<EXP>& filter,
+ T scale = 1,
+ bool use_abs = false,
+ bool add_to = false
+ );
+ /*!
+ requires
+ - in_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - out_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - in_img and out_img do not contain pixels with an alpha channel. That is,
+ pixel_traits::has_alpha is false for the pixels in these objects.
+ - is_same_object(in_img, out_img) == false
+ - T must be some scalar type
+ - filter.size() != 0
+ - scale != 0
+ - if (in_img doesn't contain grayscale pixels) then
+ - use_abs == false && add_to == false
+ (i.e. You can only use the use_abs and add_to options with grayscale images)
+ ensures
+ - Applies the given spatial filter to in_img and stores the result in out_img (i.e.
+ cross-correlates in_img with filter). Also divides each resulting pixel by scale.
+ - The intermediate filter computations will be carried out using variables of type EXP::type.
+ This is whatever scalar type is used inside the filter matrix.
+ - Pixel values are stored into out_img using the assign_pixel() function and therefore
+ any applicable color space conversion or value saturation is performed. Note that if
+ add_to is true then the filtered output value will be added to out_img rather than
+ overwriting the original value.
+ - if (in_img doesn't contain grayscale pixels) then
+ - The filter is applied to each color channel independently.
+ - if (use_abs == true) then
+ - pixel values after filtering that are < 0 are converted to their absolute values.
+ - The filter is applied such that it's centered over the pixel it writes its
+ output into. For centering purposes, we consider the center element of the
+ filter to be filter(filter.nr()/2,filter.nc()/2). This means that the filter
+ that writes its output to a pixel at location point(c,r) and is W by H (width
+ by height) pixels in size operates on exactly the pixels in the rectangle
+ centered_rect(point(c,r),W,H) within in_img.
+ - Pixels close enough to the edge of in_img to not have the filter still fit
+ inside the image are always set to zero.
+ - #out_img.nc() == in_img.nc()
+ - #out_img.nr() == in_img.nr()
+ - returns a rectangle which indicates what pixels in #out_img are considered
+ non-border pixels and therefore contain output from the filter.
+ - if (use_abs == false && all images and filers contain float types) then
+ - This function will use SIMD instructions and is particularly fast. So if
+ you can use this form of the function it can give a decent speed boost.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type,
+ typename EXP1,
+ typename EXP2,
+ typename T
+ >
+ rectangle spatially_filter_image_separable (
+ const in_image_type& in_img,
+ out_image_type& out_img,
+ const matrix_exp<EXP1>& row_filter,
+ const matrix_exp<EXP2>& col_filter,
+ T scale = 1,
+ bool use_abs = false,
+ bool add_to = false
+ );
+ /*!
+ requires
+ - in_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - out_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - in_img and out_img do not contain pixels with an alpha channel. That is,
+ pixel_traits::has_alpha is false for the pixels in these objects.
+ - is_same_object(in_img, out_img) == false
+ - T must be some scalar type
+ - scale != 0
+ - row_filter.size() != 0
+ - col_filter.size() != 0
+ - is_vector(row_filter) == true
+ - is_vector(col_filter) == true
+ - if (in_img doesn't contain grayscale pixels) then
+ - use_abs == false && add_to == false
+ (i.e. You can only use the use_abs and add_to options with grayscale images)
+ ensures
+ - Applies the given separable spatial filter to in_img and stores the result in out_img.
+ Also divides each resulting pixel by scale. Calling this function has the same
+ effect as calling the regular spatially_filter_image() routine with a filter,
+ FILT, defined as follows:
+ - FILT(r,c) == col_filter(r)*row_filter(c)
+ - The intermediate filter computations will be carried out using variables of type EXP1::type.
+ This is whatever scalar type is used inside the row_filter matrix.
+ - Pixel values are stored into out_img using the assign_pixel() function and therefore
+ any applicable color space conversion or value saturation is performed. Note that if
+ add_to is true then the filtered output value will be added to out_img rather than
+ overwriting the original value.
+ - if (in_img doesn't contain grayscale pixels) then
+ - The filter is applied to each color channel independently.
+ - if (use_abs == true) then
+ - pixel values after filtering that are < 0 are converted to their absolute values
+ - The filter is applied such that it's centered over the pixel it writes its
+ output into. For centering purposes, we consider the center element of the
+ filter to be FILT(col_filter.size()/2,row_filter.size()/2). This means that
+ the filter that writes its output to a pixel at location point(c,r) and is W
+ by H (width by height) pixels in size operates on exactly the pixels in the
+ rectangle centered_rect(point(c,r),W,H) within in_img.
+ - Pixels close enough to the edge of in_img to not have the filter still fit
+ inside the image are always set to zero.
+ - #out_img.nc() == in_img.nc()
+ - #out_img.nr() == in_img.nr()
+ - returns a rectangle which indicates what pixels in #out_img are considered
+ non-border pixels and therefore contain output from the filter.
+ - if (use_abs == false && all images and filers contain float types) then
+ - This function will use SIMD instructions and is particularly fast. So if
+ you can use this form of the function it can give a decent speed boost.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type,
+ typename EXP1,
+ typename EXP2
+ >
+ rectangle float_spatially_filter_image_separable (
+ const in_image_type& in_img,
+ out_image_type& out_img,
+ const matrix_exp<EXP1>& row_filter,
+ const matrix_exp<EXP2>& col_filter,
+ out_image_type& scratch,
+ bool add_to = false
+ );
+ /*!
+ requires
+ - in_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - out_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - in_img, out_img, row_filter, and col_filter must all contain float type elements.
+ - is_same_object(in_img, out_img) == false
+ - row_filter.size() != 0
+ - col_filter.size() != 0
+ - is_vector(row_filter) == true
+ - is_vector(col_filter) == true
+ ensures
+ - This function is identical to the above spatially_filter_image_separable()
+ function except that it can only be invoked on float images with float
+ filters. In fact, spatially_filter_image_separable() invokes
+ float_spatially_filter_image_separable() in those cases. So why is
+ float_spatially_filter_image_separable() in the public API? The reason is
+ because the separable filtering routines internally allocate an image each
+ time they are called. If you want to avoid this memory allocation then you
+ can call float_spatially_filter_image_separable() and provide the scratch
+ image as input. This allows you to reuse the same scratch image for many
+ calls to float_spatially_filter_image_separable() and thereby avoid having it
+ allocated and freed for each call.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type,
+ typename EXP1,
+ typename EXP2,
+ typename T
+ >
+ rectangle spatially_filter_image_separable_down (
+ const unsigned long downsample,
+ const in_image_type& in_img,
+ out_image_type& out_img,
+ const matrix_exp<EXP1>& row_filter,
+ const matrix_exp<EXP2>& col_filter,
+ T scale = 1,
+ bool use_abs = false,
+ bool add_to = false
+ );
+ /*!
+ requires
+ - in_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - out_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - in_img and out_img do not contain pixels with an alpha channel. That is,
+ pixel_traits::has_alpha is false for the pixels in these objects.
+ - out_img contains grayscale pixels.
+ - is_same_object(in_img, out_img) == false
+ - T must be some scalar type
+ - scale != 0
+ - is_vector(row_filter) == true
+ - is_vector(col_filter) == true
+ - row_filter.size() % 2 == 1 (i.e. must be odd)
+ - col_filter.size() % 2 == 1 (i.e. must be odd)
+ - downsample > 0
+ ensures
+ - This function is equivalent to calling
+ spatially_filter_image_separable(in_img,out_img,row_filter,col_filter,scale,use_abs,add_to)
+ and then downsampling the output image by a factor of downsample. Therefore,
+ we will have that:
+ - #out_img.nr() == ceil((double)in_img.nr()/downsample)
+ - #out_img.nc() == ceil((double)in_img.nc()/downsample)
+ - #out_img[r][c] == filtered pixel corresponding to in_img[r*downsample][c*downsample]
+ - returns a rectangle which indicates what pixels in #out_img are considered
+ non-border pixels and therefore contain output from the filter.
+ - Note that the first row and column of non-zero padded data are the following
+ - first_row == ceil(floor(col_filter.size()/2.0)/downsample)
+ - first_col == ceil(floor(row_filter.size()/2.0)/downsample)
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ long NR,
+ long NC,
+ typename T,
+ typename U,
+ typename in_image_type
+ >
+ inline void separable_3x3_filter_block_grayscale (
+ T (&block)[NR][NC],
+ const in_image_type& img,
+ const long& r,
+ const long& c,
+ const U& fe1,
+ const U& fm,
+ const U& fe2
+ );
+ /*!
+ requires
+ - in_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - T and U should be scalar types
+ - shrink_rect(get_rect(img),1).contains(c,r)
+ - shrink_rect(get_rect(img),1).contains(c+NC-1,r+NR-1)
+ ensures
+ - Filters the image in the sub-window of img defined by a rectangle
+ with its upper left corner at (c,r) and lower right at (c+NC-1,r+NR-1).
+ - The output of the filter is stored in #block. Note that img will be
+ interpreted as a grayscale image.
+ - The filter used is defined by the separable filter [fe1 fm fe2]. So the
+ spatial filter is thus:
+ fe1*fe1 fe1*fm fe2*fe1
+ fe1*fm fm*fm fe2*fm
+ fe1*fe2 fe2*fm fe2*fe2
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ long NR,
+ long NC,
+ typename T,
+ typename U,
+ typename in_image_type
+ >
+ inline void separable_3x3_filter_block_rgb (
+ T (&block)[NR][NC],
+ const in_image_type& img,
+ const long& r,
+ const long& c,
+ const U& fe1,
+ const U& fm,
+ const U& fe2
+ );
+ /*!
+ requires
+ - in_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - img must contain RGB pixels, that is pixel_traits::rgb == true for the pixels
+ in img.
+ - T should be a struct with .red .green and .blue members.
+ - U should be a scalar type
+ - shrink_rect(get_rect(img),1).contains(c,r)
+ - shrink_rect(get_rect(img),1).contains(c+NC-1,r+NR-1)
+ ensures
+ - Filters the image in the sub-window of img defined by a rectangle
+ with its upper left corner at (c,r) and lower right at (c+NC-1,r+NR-1).
+ - The output of the filter is stored in #block. Note that the filter is applied
+ to each color component independently.
+ - The filter used is defined by the separable filter [fe1 fm fe2]. So the
+ spatial filter is thus:
+ fe1*fe1 fe1*fm fe2*fe1
+ fe1*fm fm*fm fe2*fm
+ fe1*fe2 fe2*fm fe2*fe2
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ inline double gaussian (
+ double x,
+ double sigma
+ );
+ /*!
+ requires
+ - sigma > 0
+ ensures
+ - computes and returns the value of a 1D Gaussian function with mean 0
+ and standard deviation sigma at the given x value.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename T
+ >
+ matrix<T,0,1> create_gaussian_filter (
+ double sigma,
+ int size
+ );
+ /*!
+ requires
+ - sigma > 0
+ - size > 0
+ - size is an odd number
+ ensures
+ - returns a separable Gaussian filter F such that:
+ - is_vector(F) == true
+ - F.size() == size
+ - F is suitable for use with the spatially_filter_image_separable() routine
+ and its use with this function corresponds to running a Gaussian filter
+ of sigma width over an image.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type
+ >
+ rectangle gaussian_blur (
+ const in_image_type& in_img,
+ out_image_type& out_img,
+ double sigma = 1,
+ int max_size = 1001
+ );
+ /*!
+ requires
+ - in_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - out_image_type == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h
+ - in_img and out_img do not contain pixels with an alpha channel. That is,
+ pixel_traits::has_alpha is false for the pixels in these objects.
+ - is_same_object(in_img, out_img) == false
+ - sigma > 0
+ - max_size > 0
+ - max_size is an odd number
+ ensures
+ - Filters in_img with a Gaussian filter of sigma width. The actual spatial filter will
+ be applied to pixel blocks that are at most max_size wide and max_size tall (note that
+ this function will automatically select a smaller block size as appropriate). The
+ results are stored into #out_img.
+ - Pixel values are stored into out_img using the assign_pixel() function and therefore
+ any applicable color space conversion or value saturation is performed.
+ - if (in_img doesn't contain grayscale pixels) then
+ - The filter is applied to each color channel independently.
+ - Pixels close enough to the edge of in_img to not have the filter still fit
+ inside the image are set to zero.
+ - #out_img.nc() == in_img.nc()
+ - #out_img.nr() == in_img.nr()
+ - returns a rectangle which indicates what pixels in #out_img are considered
+ non-border pixels and therefore contain output from the filter.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type1,
+ typename image_type2
+ >
+ void sum_filter (
+ const image_type1& img,
+ image_type2& out,
+ const rectangle& rect
+ );
+ /*!
+ requires
+ - out.nr() == img.nr()
+ - out.nc() == img.nc()
+ - image_type1 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h and it must contain grayscale pixels.
+ - image_type2 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h and it must contain grayscale pixels.
+ - is_same_object(img,out) == false
+ ensures
+ - for all valid r and c:
+ - let SUM(r,c) == sum of pixels from img which are inside the rectangle
+ translate_rect(rect, point(c,r)).
+ - #out[r][c] == out[r][c] + SUM(r,c)
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type1,
+ typename image_type2
+ >
+ void sum_filter_assign (
+ const image_type1& img,
+ image_type2& out,
+ const rectangle& rect
+ );
+ /*!
+ requires
+ - image_type1 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h and it must contain grayscale pixels.
+ - image_type2 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h and it must contain grayscale pixels.
+ - is_same_object(img,out) == false
+ ensures
+ - #out.nr() == img.nr()
+ - #out.nc() == img.nc()
+ - for all valid r and c:
+ - let SUM(r,c) == sum of pixels from img which are inside the rectangle
+ translate_rect(rect, point(c,r)).
+ - #out[r][c] == SUM(r,c)
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type1,
+ typename image_type2
+ >
+ void max_filter (
+ image_type1& img,
+ image_type2& out,
+ const long width,
+ const long height,
+ const typename image_traits<image_type1>::pixel_type& thresh
+ );
+ /*!
+ requires
+ - out.nr() == img.nr()
+ - out.nc() == img.nc()
+ - image_type1 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h and it must contain grayscale pixels.
+ - image_type2 == an image object that implements the interface defined in
+ dlib/image_processing/generic_image.h and it must contain grayscale pixels.
+ - is_same_object(img,out) == false
+ - width > 0 && height > 0
+ ensures
+ - for all valid r and c:
+ - let MAX(r,c) == maximum of pixels from img which are inside the rectangle
+ centered_rect(point(c,r), width, height)
+ - if (MAX(r,c) >= thresh)
+ - #out[r][c] == out[r][c] + MAX(r,c)
+ - else
+ - #out[r][c] == out[r][c] + thresh
+ - Does not change the size of img.
+ - Uses img as scratch space. Therefore, the pixel values in img will have
+ been modified by this function. That is, max_filter() destroys the contents
+ of img.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SPATIAL_FILTERINg_ABSTRACT_
+
diff --git a/ml/dlib/dlib/image_transforms/thresholding.h b/ml/dlib/dlib/image_transforms/thresholding.h
new file mode 100644
index 000000000..e4fb02c4a
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/thresholding.h
@@ -0,0 +1,340 @@
+// Copyright (C) 2006 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_THRESHOLDINg_
+#define DLIB_THRESHOLDINg_
+
+#include "../pixel.h"
+#include "thresholding_abstract.h"
+#include "equalize_histogram.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ const unsigned char on_pixel = 255;
+ const unsigned char off_pixel = 0;
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type
+ >
+ void threshold_image (
+ const in_image_type& in_img_,
+ out_image_type& out_img_,
+ typename pixel_traits<typename image_traits<in_image_type>::pixel_type>::basic_pixel_type thresh
+ )
+ {
+ COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false );
+ COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false );
+
+ COMPILE_TIME_ASSERT(pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale);
+
+ const_image_view<in_image_type> in_img(in_img_);
+ image_view<out_image_type> out_img(out_img_);
+
+ // if there isn't any input image then don't do anything
+ if (in_img.size() == 0)
+ {
+ out_img.clear();
+ return;
+ }
+
+ out_img.set_size(in_img.nr(),in_img.nc());
+
+ for (long r = 0; r < in_img.nr(); ++r)
+ {
+ for (long c = 0; c < in_img.nc(); ++c)
+ {
+ if (get_pixel_intensity(in_img[r][c]) >= thresh)
+ assign_pixel(out_img[r][c], on_pixel);
+ else
+ assign_pixel(out_img[r][c], off_pixel);
+ }
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename image_type
+ >
+ void threshold_image (
+ image_type& img,
+ typename pixel_traits<typename image_traits<image_type>::pixel_type>::basic_pixel_type thresh
+ )
+ {
+ threshold_image(img,img,thresh);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type
+ >
+ void auto_threshold_image (
+ const in_image_type& in_img_,
+ out_image_type& out_img_
+ )
+ {
+ COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false );
+ COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false );
+ COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<in_image_type>::pixel_type>::is_unsigned == true );
+ COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<out_image_type>::pixel_type>::is_unsigned == true );
+
+ COMPILE_TIME_ASSERT(pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale);
+
+ image_view<out_image_type> out_img(out_img_);
+
+ // if there isn't any input image then don't do anything
+ if (image_size(in_img_) == 0)
+ {
+ out_img.clear();
+ return;
+ }
+
+ unsigned long thresh;
+ // find the threshold we should use
+ matrix<unsigned long,1> hist;
+ get_histogram(in_img_,hist);
+
+ const_image_view<in_image_type> in_img(in_img_);
+
+ // Start our two means (a and b) out at the ends of the histogram
+ long a = 0;
+ long b = hist.size()-1;
+ bool moved_a = true;
+ bool moved_b = true;
+ while (moved_a || moved_b)
+ {
+ moved_a = false;
+ moved_b = false;
+
+ // catch the degenerate case where the histogram is empty
+ if (a >= b)
+ break;
+
+ if (hist(a) == 0)
+ {
+ ++a;
+ moved_a = true;
+ }
+
+ if (hist(b) == 0)
+ {
+ --b;
+ moved_b = true;
+ }
+ }
+
+ // now do k-means clustering with k = 2 on the histogram.
+ moved_a = true;
+ moved_b = true;
+ while (moved_a || moved_b)
+ {
+ moved_a = false;
+ moved_b = false;
+
+ int64 a_hits = 0;
+ int64 b_hits = 0;
+ int64 a_mass = 0;
+ int64 b_mass = 0;
+
+ for (long i = 0; i < hist.size(); ++i)
+ {
+ // if i is closer to a
+ if (std::abs(i-a) < std::abs(i-b))
+ {
+ a_mass += hist(i)*i;
+ a_hits += hist(i);
+ }
+ else // if i is closer to b
+ {
+ b_mass += hist(i)*i;
+ b_hits += hist(i);
+ }
+ }
+
+ long new_a = (a_mass + a_hits/2)/a_hits;
+ long new_b = (b_mass + b_hits/2)/b_hits;
+
+ if (new_a != a)
+ {
+ moved_a = true;
+ a = new_a;
+ }
+
+ if (new_b != b)
+ {
+ moved_b = true;
+ b = new_b;
+ }
+ }
+
+ // put the threshold between the two means we found
+ thresh = (a + b)/2;
+
+ // now actually apply the threshold
+ threshold_image(in_img_,out_img_,thresh);
+ }
+
+ template <
+ typename image_type
+ >
+ void auto_threshold_image (
+ image_type& img
+ )
+ {
+ auto_threshold_image(img,img);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type
+ >
+ void hysteresis_threshold (
+ const in_image_type& in_img_,
+ out_image_type& out_img_,
+ typename pixel_traits<typename image_traits<in_image_type>::pixel_type>::basic_pixel_type lower_thresh,
+ typename pixel_traits<typename image_traits<in_image_type>::pixel_type>::basic_pixel_type upper_thresh
+ )
+ {
+ COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false );
+ COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false );
+
+ COMPILE_TIME_ASSERT(pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale);
+
+ DLIB_ASSERT( lower_thresh <= upper_thresh && is_same_object(in_img_, out_img_) == false,
+ "\tvoid hysteresis_threshold(in_img_, out_img_, lower_thresh, upper_thresh)"
+ << "\n\tYou can't use an upper_thresh that is less than your lower_thresh"
+ << "\n\tlower_thresh: " << lower_thresh
+ << "\n\tupper_thresh: " << upper_thresh
+ << "\n\tis_same_object(in_img_,out_img_): " << is_same_object(in_img_,out_img_)
+ );
+
+ const_image_view<in_image_type> in_img(in_img_);
+ image_view<out_image_type> out_img(out_img_);
+
+ // if there isn't any input image then don't do anything
+ if (in_img.size() == 0)
+ {
+ out_img.clear();
+ return;
+ }
+
+ out_img.set_size(in_img.nr(),in_img.nc());
+ assign_all_pixels(out_img, off_pixel);
+
+ const long size = 1000;
+ long rstack[size];
+ long cstack[size];
+
+ // now do the thresholding
+ for (long r = 0; r < in_img.nr(); ++r)
+ {
+ for (long c = 0; c < in_img.nc(); ++c)
+ {
+ typename pixel_traits<typename image_traits<in_image_type>::pixel_type>::basic_pixel_type p;
+ assign_pixel(p,in_img[r][c]);
+ if (p >= upper_thresh)
+ {
+ // now do line following for pixels >= lower_thresh.
+ // set the stack position to 0.
+ long pos = 1;
+ rstack[0] = r;
+ cstack[0] = c;
+
+ while (pos > 0)
+ {
+ --pos;
+ const long r = rstack[pos];
+ const long c = cstack[pos];
+
+ // This is the base case of our recursion. We want to stop if we hit a
+ // pixel we have already visited.
+ if (out_img[r][c] == on_pixel)
+ continue;
+
+ out_img[r][c] = on_pixel;
+
+ // put the neighbors of this pixel on the stack if they are bright enough
+ if (r-1 >= 0)
+ {
+ if (pos < size && get_pixel_intensity(in_img[r-1][c]) >= lower_thresh)
+ {
+ rstack[pos] = r-1;
+ cstack[pos] = c;
+ ++pos;
+ }
+ if (pos < size && c-1 >= 0 && get_pixel_intensity(in_img[r-1][c-1]) >= lower_thresh)
+ {
+ rstack[pos] = r-1;
+ cstack[pos] = c-1;
+ ++pos;
+ }
+ if (pos < size && c+1 < in_img.nc() && get_pixel_intensity(in_img[r-1][c+1]) >= lower_thresh)
+ {
+ rstack[pos] = r-1;
+ cstack[pos] = c+1;
+ ++pos;
+ }
+ }
+
+ if (pos < size && c-1 >= 0 && get_pixel_intensity(in_img[r][c-1]) >= lower_thresh)
+ {
+ rstack[pos] = r;
+ cstack[pos] = c-1;
+ ++pos;
+ }
+ if (pos < size && c+1 < in_img.nc() && get_pixel_intensity(in_img[r][c+1]) >= lower_thresh)
+ {
+ rstack[pos] = r;
+ cstack[pos] = c+1;
+ ++pos;
+ }
+
+ if (r+1 < in_img.nr())
+ {
+ if (pos < size && get_pixel_intensity(in_img[r+1][c]) >= lower_thresh)
+ {
+ rstack[pos] = r+1;
+ cstack[pos] = c;
+ ++pos;
+ }
+ if (pos < size && c-1 >= 0 && get_pixel_intensity(in_img[r+1][c-1]) >= lower_thresh)
+ {
+ rstack[pos] = r+1;
+ cstack[pos] = c-1;
+ ++pos;
+ }
+ if (pos < size && c+1 < in_img.nc() && get_pixel_intensity(in_img[r+1][c+1]) >= lower_thresh)
+ {
+ rstack[pos] = r+1;
+ cstack[pos] = c+1;
+ ++pos;
+ }
+ }
+
+ } // end while (pos >= 0)
+
+ }
+ else
+ {
+ out_img[r][c] = off_pixel;
+ }
+
+ }
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_THRESHOLDINg_
+
diff --git a/ml/dlib/dlib/image_transforms/thresholding_abstract.h b/ml/dlib/dlib/image_transforms/thresholding_abstract.h
new file mode 100644
index 000000000..e7c1e8826
--- /dev/null
+++ b/ml/dlib/dlib/image_transforms/thresholding_abstract.h
@@ -0,0 +1,139 @@
+// Copyright (C) 2006 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_THRESHOLDINg_ABSTRACT_
+#ifdef DLIB_THRESHOLDINg_ABSTRACT_
+
+#include "../pixel.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ const unsigned char on_pixel = 255;
+ const unsigned char off_pixel = 0;
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type
+ >
+ void threshold_image (
+ const in_image_type& in_img,
+ out_image_type& out_img,
+ typename pixel_traits<typename image_traits<in_image_type>::pixel_type>::basic_pixel_type thresh
+ );
+ /*!
+ requires
+ - in_image_type == is an implementation of array2d/array2d_kernel_abstract.h
+ - out_image_type == is an implementation of array2d/array2d_kernel_abstract.h
+ - pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale == true
+ - pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false
+ - pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false
+ ensures
+ - #out_img == the thresholded version of in_img (in_img is converted to a grayscale
+ intensity image if it is color). Pixels in in_img with grayscale values >= thresh
+ have an output value of on_pixel and all others have a value of off_pixel.
+ - #out_img.nc() == in_img.nc()
+ - #out_img.nr() == in_img.nr()
+ !*/
+
+ template <
+ typename image_type
+ >
+ void threshold_image (
+ image_type& img,
+ typename pixel_traits<typename image_traits<image_type>::pixel_type>::basic_pixel_type thresh
+ );
+ /*!
+ requires
+ - it is valid to call threshold_image(img,img,thresh);
+ ensures
+ - calls threshold_image(img,img,thresh);
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type
+ >
+ void auto_threshold_image (
+ const in_image_type& in_img,
+ out_image_type& out_img
+ );
+ /*!
+ requires
+ - in_image_type == is an implementation of array2d/array2d_kernel_abstract.h
+ - out_image_type == is an implementation of array2d/array2d_kernel_abstract.h
+ - pixel_traits<typename image_traits<in_image_type>::pixel_type>::max() <= 65535
+ - pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false
+ - pixel_traits<typename image_traits<in_image_type>::pixel_type>::is_unsigned == true
+ - pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale == true
+ - pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false
+ - pixel_traits<typename image_traits<out_image_type>::pixel_type>::is_unsigned == true
+ ensures
+ - #out_img == the thresholded version of in_img (in_img is converted to a grayscale
+ intensity image if it is color). Pixels in in_img with grayscale values >= thresh
+ have an output value of on_pixel and all others have a value of off_pixel.
+ - The thresh value used is determined by performing a k-means clustering
+ on the input image histogram with a k of 2. The point between the two
+ means found is used as the thresh value.
+ - #out_img.nc() == in_img.nc()
+ - #out_img.nr() == in_img.nr()
+ !*/
+
+ template <
+ typename image_type
+ >
+ void auto_threshold_image (
+ image_type& img
+ );
+ /*!
+ requires
+ - it is valid to call auto_threshold_image(img,img);
+ ensures
+ - calls auto_threshold_image(img,img);
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename in_image_type,
+ typename out_image_type
+ >
+ void hysteresis_threshold (
+ const in_image_type& in_img,
+ out_image_type& out_img,
+ typename pixel_traits<typename image_traits<in_image_type>::pixel_type>::basic_pixel_type lower_thresh,
+ typename pixel_traits<typename image_traits<in_image_type>::pixel_type>::basic_pixel_type upper_thresh
+ );
+ /*!
+ requires
+ - in_image_type == is an implementation of array2d/array2d_kernel_abstract.h
+ - out_image_type == is an implementation of array2d/array2d_kernel_abstract.h
+ - pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale == true
+ - pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false
+ - pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false
+ - lower_thresh <= upper_thresh
+ - is_same_object(in_img, out_img) == false
+ ensures
+ - #out_img == the hysteresis thresholded version of in_img (in_img is converted to a
+ grayscale intensity image if it is color). Pixels in in_img with grayscale
+ values >= upper_thresh have an output value of on_pixel and all others have a
+ value of off_pixel unless they are >= lower_thresh and are connected to a pixel
+ with a value >= upper_thresh, in which case they have a value of on_pixel. Here
+ pixels are connected if there is a path between them composed of pixels that
+ would receive an output of on_pixel.
+ - #out_img.nc() == in_img.nc()
+ - #out_img.nr() == in_img.nr()
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_THRESHOLDINg_ABSTRACT_
+
+