diff options
Diffstat (limited to 'ml/dlib/dlib/image_transforms')
36 files changed, 16458 insertions, 0 deletions
diff --git a/ml/dlib/dlib/image_transforms/assign_image.h b/ml/dlib/dlib/image_transforms/assign_image.h new file mode 100644 index 000000000..c69878efa --- /dev/null +++ b/ml/dlib/dlib/image_transforms/assign_image.h @@ -0,0 +1,385 @@ +// Copyright (C) 2007 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_ASSIGN_IMAGe_ +#define DLIB_ASSIGN_IMAGe_ + +#include "../pixel.h" +#include "assign_image_abstract.h" +#include "../statistics.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename dest_image_type, + typename src_image_type + > + void impl_assign_image ( + image_view<dest_image_type>& dest, + const src_image_type& src + ) + { + dest.set_size(src.nr(),src.nc()); + for (long r = 0; r < src.nr(); ++r) + { + for (long c = 0; c < src.nc(); ++c) + { + assign_pixel(dest[r][c], src(r,c)); + } + } + } + + template < + typename dest_image_type, + typename src_image_type + > + void impl_assign_image ( + dest_image_type& dest_, + const src_image_type& src + ) + { + image_view<dest_image_type> dest(dest_); + impl_assign_image(dest, src); + } + + template < + typename dest_image_type, + typename src_image_type + > + void assign_image ( + dest_image_type& dest, + const src_image_type& src + ) + { + // check for the case where dest is the same object as src + if (is_same_object(dest,src)) + return; + + impl_assign_image(dest, mat(src)); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename dest_image_type, + typename src_image_type + > + void impl_assign_image_scaled ( + image_view<dest_image_type>& dest, + const src_image_type& src, + const double thresh + ) + { + DLIB_ASSERT( thresh > 0, + "\tvoid assign_image_scaled()" + << "\n\t You have given an threshold value" + << "\n\t thresh: " << thresh + ); + + + typedef typename image_traits<dest_image_type>::pixel_type dest_pixel; + + // If the destination has a dynamic range big enough to contain the source image data then just do a + // regular assign_image() + if (pixel_traits<dest_pixel>::max() >= pixel_traits<typename src_image_type::type>::max() && + pixel_traits<dest_pixel>::min() <= pixel_traits<typename src_image_type::type>::min() ) + { + impl_assign_image(dest, src); + return; + } + + dest.set_size(src.nr(),src.nc()); + + if (src.size() == 0) + return; + + if (src.size() == 1) + { + impl_assign_image(dest, src); + return; + } + + // gather image statistics + running_stats<double> rs; + for (long r = 0; r < src.nr(); ++r) + { + for (long c = 0; c < src.nc(); ++c) + { + rs.add(get_pixel_intensity(src(r,c))); + } + } + typedef typename pixel_traits<typename src_image_type::type>::basic_pixel_type spix_type; + + if (std::numeric_limits<spix_type>::is_integer) + { + // If the destination has a dynamic range big enough to contain the source image data then just do a + // regular assign_image() + if (pixel_traits<dest_pixel>::max() >= rs.max() && + pixel_traits<dest_pixel>::min() <= rs.min() ) + { + impl_assign_image(dest, src); + return; + } + } + + // Figure out the range of pixel values based on image statistics. There might be some huge + // outliers so don't just pick the min and max values. + const double upper = std::min(rs.mean() + thresh*rs.stddev(), rs.max()); + const double lower = std::max(rs.mean() - thresh*rs.stddev(), rs.min()); + + + const double dest_min = pixel_traits<dest_pixel>::min(); + const double dest_max = pixel_traits<dest_pixel>::max(); + + const double scale = (upper!=lower)? ((dest_max - dest_min) / (upper - lower)) : 0; + + for (long r = 0; r < src.nr(); ++r) + { + for (long c = 0; c < src.nc(); ++c) + { + const double val = get_pixel_intensity(src(r,c)) - lower; + + assign_pixel(dest[r][c], scale*val + dest_min); + } + } + } + + template < + typename dest_image_type, + typename src_image_type + > + void impl_assign_image_scaled ( + dest_image_type& dest_, + const src_image_type& src, + const double thresh + ) + { + image_view<dest_image_type> dest(dest_); + impl_assign_image_scaled(dest, src, thresh); + } + + template < + typename dest_image_type, + typename src_image_type + > + void assign_image_scaled ( + dest_image_type& dest, + const src_image_type& src, + const double thresh = 4 + ) + { + // check for the case where dest is the same object as src + if (is_same_object(dest,src)) + return; + + impl_assign_image_scaled(dest, mat(src),thresh); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename dest_image_type, + typename src_pixel_type + > + void assign_all_pixels ( + image_view<dest_image_type>& dest_img, + const src_pixel_type& src_pixel + ) + { + for (long r = 0; r < dest_img.nr(); ++r) + { + for (long c = 0; c < dest_img.nc(); ++c) + { + assign_pixel(dest_img[r][c], src_pixel); + } + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename dest_image_type, + typename src_pixel_type + > + void assign_all_pixels ( + dest_image_type& dest_img_, + const src_pixel_type& src_pixel + ) + { + image_view<dest_image_type> dest_img(dest_img_); + assign_all_pixels(dest_img, src_pixel); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + void assign_border_pixels ( + image_view<image_type>& img, + long x_border_size, + long y_border_size, + const typename image_traits<image_type>::pixel_type& p + ) + { + DLIB_ASSERT( x_border_size >= 0 && y_border_size >= 0, + "\tvoid assign_border_pixels(img, p, border_size)" + << "\n\tYou have given an invalid border_size" + << "\n\tx_border_size: " << x_border_size + << "\n\ty_border_size: " << y_border_size + ); + + y_border_size = std::min(y_border_size, img.nr()/2+1); + x_border_size = std::min(x_border_size, img.nc()/2+1); + + // assign the top border + for (long r = 0; r < y_border_size; ++r) + { + for (long c = 0; c < img.nc(); ++c) + { + img[r][c] = p; + } + } + + // assign the bottom border + for (long r = img.nr()-y_border_size; r < img.nr(); ++r) + { + for (long c = 0; c < img.nc(); ++c) + { + img[r][c] = p; + } + } + + // now assign the two sides + for (long r = y_border_size; r < img.nr()-y_border_size; ++r) + { + // left border + for (long c = 0; c < x_border_size; ++c) + img[r][c] = p; + + // right border + for (long c = img.nc()-x_border_size; c < img.nc(); ++c) + img[r][c] = p; + } + } + + template < + typename image_type + > + void assign_border_pixels ( + image_type& img_, + long x_border_size, + long y_border_size, + const typename image_traits<image_type>::pixel_type& p + ) + { + image_view<image_type> img(img_); + assign_border_pixels(img, x_border_size, y_border_size, p); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + void zero_border_pixels ( + image_type& img, + long x_border_size, + long y_border_size + ) + { + DLIB_ASSERT( x_border_size >= 0 && y_border_size >= 0, + "\tvoid zero_border_pixels(img, p, border_size)" + << "\n\tYou have given an invalid border_size" + << "\n\tx_border_size: " << x_border_size + << "\n\ty_border_size: " << y_border_size + ); + + typename image_traits<image_type>::pixel_type zero_pixel; + assign_pixel_intensity(zero_pixel, 0); + assign_border_pixels(img, x_border_size, y_border_size, zero_pixel); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + void zero_border_pixels ( + image_view<image_type>& img, + long x_border_size, + long y_border_size + ) + { + DLIB_ASSERT( x_border_size >= 0 && y_border_size >= 0, + "\tvoid zero_border_pixels(img, p, border_size)" + << "\n\tYou have given an invalid border_size" + << "\n\tx_border_size: " << x_border_size + << "\n\ty_border_size: " << y_border_size + ); + + typename image_traits<image_type>::pixel_type zero_pixel; + assign_pixel_intensity(zero_pixel, 0); + assign_border_pixels(img, x_border_size, y_border_size, zero_pixel); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + void zero_border_pixels ( + image_view<image_type>& img, + rectangle inside + ) + { + inside = inside.intersect(get_rect(img)); + if (inside.is_empty()) + { + assign_all_pixels(img, 0); + return; + } + + for (long r = 0; r < inside.top(); ++r) + { + for (long c = 0; c < img.nc(); ++c) + assign_pixel(img[r][c], 0); + } + for (long r = inside.top(); r <= inside.bottom(); ++r) + { + for (long c = 0; c < inside.left(); ++c) + assign_pixel(img[r][c], 0); + for (long c = inside.right()+1; c < img.nc(); ++c) + assign_pixel(img[r][c], 0); + } + for (long r = inside.bottom()+1; r < img.nr(); ++r) + { + for (long c = 0; c < img.nc(); ++c) + assign_pixel(img[r][c], 0); + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + void zero_border_pixels ( + image_type& img_, + const rectangle& inside + ) + { + image_view<image_type> img(img_); + zero_border_pixels(img, inside); + } + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_ASSIGN_IMAGe_ + + + diff --git a/ml/dlib/dlib/image_transforms/assign_image_abstract.h b/ml/dlib/dlib/image_transforms/assign_image_abstract.h new file mode 100644 index 000000000..5ba262ba5 --- /dev/null +++ b/ml/dlib/dlib/image_transforms/assign_image_abstract.h @@ -0,0 +1,196 @@ +// Copyright (C) 2007 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_ASSIGN_IMAGe_ABSTRACT +#ifdef DLIB_ASSIGN_IMAGe_ABSTRACT + +#include "../pixel.h" +#include "../image_processing/generic_image.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename dest_image_type, + typename src_image_type + > + void assign_image ( + dest_image_type& dest_img, + const src_image_type& src_img + ); + /*! + requires + - src_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h or any object convertible to a matrix + via mat(). + - dest_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h or an image_view. + ensures + - #dest_img.nc() == src_img.nc() + - #dest_img.nr() == src_img.nr() + - for all valid r and c: + - performs assign_pixel(#dest_img[r][c],src_img[r][c]) + (i.e. copies the src image to dest image) + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename dest_image_type, + typename src_image_type + > + void assign_image_scaled ( + dest_image_type& dest_img, + const src_image_type& src_img, + const double thresh = 4 + ); + /*! + requires + - src_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h or any object convertible to a matrix + via mat(). + - dest_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h or an image_view. + - thresh > 0 + ensures + - #dest_img.nc() == src_img.nc() + - #dest_img.nr() == src_img.nr() + - if (dest_img's pixels have a wide enough dynamic range to contain all the + pixels in src_img. (Note that dynamic range is determined by the min() and + max() pixel_traits properties)) then + - performs: assign_image(dest_img, src_img) + (i.e. in this case, no scaling is performed. Just a normal color space + conversion and copy ) + - else + - #dest_img will be converted to a grayscale image + - scales the contents of src_img into the dynamic range of dest_img and then + assigns the result into dest_img. The thresh parameter is used to filter + source pixel values which are outliers. These outliers will saturate + at the edge of the destination image's dynamic range. + - Specifically, for all valid r and c: + - scales get_pixel_intensity(src_img[r][c]) into the dynamic range + of the dest_img. This is done by computing the mean and standard + deviation of src_img. Call the mean M and the standard deviation + D. Then the scaling from src_img to dest_img is performed using + the following mapping: + let SRC_UPPER = min(M + thresh*D, max(mat(src_img))) + let SRC_LOWER = max(M - thresh*D, min(mat(src_img))) + let DEST_UPPER = pixel_traits<image_traits<dest_image_type>::pixel_type>::max() + let DEST_LOWER = pixel_traits<image_traits<dest_image_type>::pixel_type>::min() + + MAPPING: [SRC_LOWER, SRC_UPPER] -> [DEST_LOWER, DEST_UPPER] + + Where this mapping is a linear mapping of values from the left range + into the right range of values. Source pixel values outside the left + range are modified to be at the appropriate end of the range. + + The scaled pixel is then stored in dest_img[r][c]. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename dest_image_type, + typename src_pixel_type + > + void assign_all_pixels ( + dest_image_type& dest_img, + const src_pixel_type& src_pixel + ); + /*! + requires + - dest_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h or an image_view. + - pixel_traits<src_pixel_type> is defined + ensures + - #dest_img.nc() == dest_img.nc() + - #dest_img.nr() == dest_img.nr() + (i.e. the size of dest_img isn't changed by this function) + - for all valid r and c: + - performs assign_pixel(#dest_img[r][c],src_pixel) + (i.e. assigns the src pixel to every pixel in the dest image) + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + void assign_border_pixels ( + image_type& img, + long x_border_size, + long y_border_size, + const typename image_traits<image_type>::pixel_type& p + ); + /*! + requires + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h or an image_view + - x_border_size >= 0 + - y_border_size >= 0 + ensures + - #img.nc() == img.nc() + - #img.nr() == img.nr() + (i.e. the size of img isn't changed by this function) + - for all valid r such that r+y_border_size or r-y_border_size gives an invalid row + - for all valid c such that c+x_border_size or c-x_border_size gives an invalid column + - performs assign_pixel(#img[r][c],p) + (i.e. assigns the given pixel to every pixel in the border of img) + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + void zero_border_pixels ( + image_type& img, + long x_border_size, + long y_border_size + ); + /*! + requires + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h or an image_view + - x_border_size >= 0 + - y_border_size >= 0 + ensures + - #img.nc() == img.nc() + - #img.nr() == img.nr() + (i.e. the size of img isn't changed by this function) + - for all valid r such that r+y_border_size or r-y_border_size gives an invalid row + - for all valid c such that c+x_border_size or c-x_border_size gives an invalid column + - performs assign_pixel(#img[r][c], 0 ) + (i.e. assigns 0 to every pixel in the border of img) + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + void zero_border_pixels ( + image_type& img, + rectangle inside + ); + /*! + requires + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h or an image_view + ensures + - #img.nc() == img.nc() + - #img.nr() == img.nr() + (i.e. the size of img isn't changed by this function) + - All the pixels in img that are not contained inside the inside rectangle + given to this function are set to 0. That is, anything not "inside" is on + the border and set to 0. + !*/ + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_ASSIGN_IMAGe_ABSTRACT + + diff --git a/ml/dlib/dlib/image_transforms/colormaps.h b/ml/dlib/dlib/image_transforms/colormaps.h new file mode 100644 index 000000000..813d1ff75 --- /dev/null +++ b/ml/dlib/dlib/image_transforms/colormaps.h @@ -0,0 +1,269 @@ +// Copyright (C) 2011 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_RANDOMLY_COlOR_IMAGE_Hh_ +#define DLIB_RANDOMLY_COlOR_IMAGE_Hh_ + +#include "colormaps_abstract.h" +#include "../hash.h" +#include "../pixel.h" +#include "../matrix.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template <typename T> + struct op_randomly_color_image : does_not_alias + { + op_randomly_color_image( const T& img_) : img(img_){} + + const T& img; + + const static long cost = 7; + const static long NR = 0; + const static long NC = 0; + typedef rgb_pixel type; + typedef const rgb_pixel const_ret_type; + typedef default_memory_manager mem_manager_type; + typedef row_major_layout layout_type; + + const_ret_type apply (long r, long c ) const + { + const unsigned long gray = get_pixel_intensity(mat(img)(r,c)); + if (gray != 0) + { + const uint32 h = murmur_hash3_2(gray,0); + rgb_pixel pix; + pix.red = static_cast<unsigned char>(h)%200 + 55; + pix.green = static_cast<unsigned char>(h>>8)%200 + 55; + pix.blue = static_cast<unsigned char>(h>>16)%200 + 55; + return pix; + } + else + { + // keep black pixels black + return rgb_pixel(0,0,0); + } + } + + long nr () const { return num_rows(img); } + long nc () const { return num_columns(img); } + }; + + template < + typename image_type + > + const matrix_op<op_randomly_color_image<image_type> > + randomly_color_image ( + const image_type& img + ) + { + typedef op_randomly_color_image<image_type> op; + return matrix_op<op>(op(img)); + } + +// ---------------------------------------------------------------------------------------- + + inline rgb_pixel colormap_heat ( + double value, + double min_val, + double max_val + ) + { + // scale the gray value into the range [0, 1] + const double gray = put_in_range(0, 1, (value - min_val)/(max_val-min_val)); + rgb_pixel pix(0,0,0); + + pix.red = static_cast<unsigned char>(std::min(gray/0.4,1.0)*255 + 0.5); + + if (gray > 0.4) + { + pix.green = static_cast<unsigned char>(std::min((gray-0.4)/0.4,1.0)*255 + 0.5); + } + if (gray > 0.8) + { + pix.blue = static_cast<unsigned char>(std::min((gray-0.8)/0.2,1.0)*255 + 0.5); + } + + return pix; + } + +// ---------------------------------------------------------------------------------------- + + template <typename T> + struct op_heatmap : does_not_alias + { + op_heatmap( + const T& img_, + const double max_val_, + const double min_val_ + ) : img(img_), max_val(max_val_), min_val(min_val_){} + + const T& img; + + const double max_val; + const double min_val; + + const static long cost = 7; + const static long NR = 0; + const static long NC = 0; + typedef rgb_pixel type; + typedef const rgb_pixel const_ret_type; + typedef default_memory_manager mem_manager_type; + typedef row_major_layout layout_type; + + const_ret_type apply (long r, long c ) const + { + return colormap_heat(get_pixel_intensity(mat(img)(r,c)), min_val, max_val); + } + + long nr () const { return num_rows(img); } + long nc () const { return num_columns(img); } + }; + + template < + typename image_type + > + const matrix_op<op_heatmap<image_type> > + heatmap ( + const image_type& img, + double max_val, + double min_val = 0 + ) + { + typedef op_heatmap<image_type> op; + return matrix_op<op>(op(img,max_val,min_val)); + } + + template < + typename image_type + > + const matrix_op<op_heatmap<image_type> > + heatmap ( + const image_type& img + ) + { + typedef op_heatmap<image_type> op; + if (num_columns(img) * num_rows(img) != 0) + return matrix_op<op>(op(img,max(mat(img)),min(mat(img)))); + else + return matrix_op<op>(op(img,0,0)); + } + +// ---------------------------------------------------------------------------------------- + + inline rgb_pixel colormap_jet ( + double value, + double min_val, + double max_val + ) + { + // scale the gray value into the range [0, 8] + const double gray = 8*put_in_range(0, 1, (value - min_val)/(max_val-min_val)); + rgb_pixel pix; + // s is the slope of color change + const double s = 1.0/2.0; + + if (gray <= 1) + { + pix.red = 0; + pix.green = 0; + pix.blue = static_cast<unsigned char>((gray+1)*s*255 + 0.5); + } + else if (gray <= 3) + { + pix.red = 0; + pix.green = static_cast<unsigned char>((gray-1)*s*255 + 0.5); + pix.blue = 255; + } + else if (gray <= 5) + { + pix.red = static_cast<unsigned char>((gray-3)*s*255 + 0.5); + pix.green = 255; + pix.blue = static_cast<unsigned char>((5-gray)*s*255 + 0.5); + } + else if (gray <= 7) + { + pix.red = 255; + pix.green = static_cast<unsigned char>((7-gray)*s*255 + 0.5); + pix.blue = 0; + } + else + { + pix.red = static_cast<unsigned char>((9-gray)*s*255 + 0.5); + pix.green = 0; + pix.blue = 0; + } + + return pix; + } + +// ---------------------------------------------------------------------------------------- + + template <typename T> + struct op_jet : does_not_alias + { + op_jet( + const T& img_, + const double max_val_, + const double min_val_ + ) : img(img_), max_val(max_val_), min_val(min_val_){} + + const T& img; + + const double max_val; + const double min_val; + + const static long cost = 7; + const static long NR = 0; + const static long NC = 0; + typedef rgb_pixel type; + typedef const rgb_pixel const_ret_type; + typedef default_memory_manager mem_manager_type; + typedef row_major_layout layout_type; + + const_ret_type apply (long r, long c ) const + { + return colormap_jet(get_pixel_intensity(mat(img)(r,c)), min_val, max_val); + } + + long nr () const { return num_rows(img); } + long nc () const { return num_columns(img); } + }; + + template < + typename image_type + > + const matrix_op<op_jet<image_type> > + jet ( + const image_type& img, + double max_val, + double min_val = 0 + ) + { + typedef op_jet<image_type> op; + return matrix_op<op>(op(img,max_val,min_val)); + } + + template < + typename image_type + > + const matrix_op<op_jet<image_type> > + jet ( + const image_type& img + ) + { + typedef op_jet<image_type> op; + if (num_columns(img) * num_rows(img) != 0) + return matrix_op<op>(op(img,max(mat(img)),min(mat(img)))); + else + return matrix_op<op>(op(img,0,0)); + } + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_RANDOMLY_COlOR_IMAGE_Hh_ + diff --git a/ml/dlib/dlib/image_transforms/colormaps_abstract.h b/ml/dlib/dlib/image_transforms/colormaps_abstract.h new file mode 100644 index 000000000..41a7784ba --- /dev/null +++ b/ml/dlib/dlib/image_transforms/colormaps_abstract.h @@ -0,0 +1,152 @@ +// Copyright (C) 2011 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_RANDOMLY_COlOR_IMAGE_ABSTRACT_Hh_ +#ifdef DLIB_RANDOMLY_COlOR_IMAGE_ABSTRACT_Hh_ + +#include "../hash.h" +#include "../pixel.h" +#include "../matrix.h" +#include "../image_processing/generic_image.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + const matrix_exp randomly_color_image ( + const image_type& img + ); + /*! + requires + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h, or something convertible to a matrix + via mat(). + ensures + - randomly generates a mapping from gray level pixel values + to the RGB pixel space and then uses this mapping to create + a colored version of img. Returns a matrix which represents + this colored version of img. + - black pixels in img will remain black in the output image. + - The returned matrix will have the same dimensions as img. + !*/ + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + + rgb_pixel colormap_heat ( + double value, + double min_val, + double max_val + ); + /*! + requires + - min_val <= max_val + ensures + - Maps value to a color. In particular, we use a heatmap color scheme where + values <= min_val are black and larger values become more red, then yellow, + and then white as they approach max_val. + !*/ + + template < + typename image_type + > + const matrix_exp heatmap ( + const image_type& img, + double max_val, + double min_val = 0 + ); + /*! + requires + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h, or something convertible to a matrix + via mat(). + ensures + - Interprets img as a grayscale image and returns a new matrix which represents + a colored version of img. In particular, the colormap is defined by + out_color = colormap_heat(grayscale_pixel_value, min_val, max_val). + - The returned matrix will have the same dimensions as img. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + const matrix_exp heatmap ( + const image_type& img + ); + /*! + requires + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h, or something convertible to a matrix + via mat(). + ensures + - returns heatmap(img, max(mat(img)), min(mat(img))) + !*/ + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + + rgb_pixel colormap_jet ( + double value, + double min_val, + double max_val + ); + /*! + requires + - min_val <= max_val + ensures + - Maps value to a color. In particular, we use a jet color scheme where + values <= min_val are dark blue and larger values become light blue, then + yellow, and then finally red as they approach max_val. + !*/ + + template < + typename image_type + > + const matrix_exp jet ( + const image_type& img, + double max_val, + double min_val = 0 + ); + /*! + requires + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h, or something convertible to a matrix + via mat(). + ensures + - Interprets img as a grayscale image and returns a new matrix which represents + a colored version of img. In particular, the colormap is defined by + out_color = colormap_jet(grayscale_pixel_value, min_val, max_val). + - The returned matrix will have the same dimensions as img. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + const matrix_exp jet ( + const image_type& img + ); + /*! + requires + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h, or something convertible to a matrix + via mat(). + ensures + - returns jet(img, max(mat(img)), min(mat(img))) + !*/ + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_RANDOMLY_COlOR_IMAGE_ABSTRACT_Hh_ + + diff --git a/ml/dlib/dlib/image_transforms/draw.h b/ml/dlib/dlib/image_transforms/draw.h new file mode 100644 index 000000000..66737b215 --- /dev/null +++ b/ml/dlib/dlib/image_transforms/draw.h @@ -0,0 +1,396 @@ +// Copyright (C) 2008 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_DRAW_IMAGe_ +#define DLIB_DRAW_IMAGe_ + +#include "draw_abstract.h" +#include "../algs.h" +#include "../pixel.h" +#include "../matrix.h" +#include <cmath> + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type, + typename pixel_type + > + void draw_line ( + long x1, + long y1, + long x2, + long y2, + image_type& c_, + const pixel_type& val + ) + { + image_view<image_type> c(c_); + if (x1 == x2) + { + // make sure y1 comes before y2 + if (y1 > y2) + swap(y1,y2); + + if (x1 < 0 || x1 >= c.nc()) + return; + + + // this is a vertical line + for (long y = y1; y <= y2; ++y) + { + if (y < 0 || y >= c.nr()) + continue; + + assign_pixel(c[y][x1], val); + } + } + else if (y1 == y2) + { + + // make sure x1 comes before x2 + if (x1 > x2) + swap(x1,x2); + + if (y1 < 0 || y1 >= c.nr()) + return; + + // this is a horizontal line + for (long x = x1; x <= x2; ++x) + { + if (x < 0 || x >= c.nc()) + continue; + + assign_pixel(c[y1][x] , val); + } + } + else + { + // This part is a little more complicated because we are going to perform alpha + // blending so the diagonal lines look nice. + const rectangle valid_area = get_rect(c); + rgb_alpha_pixel alpha_pixel; + assign_pixel(alpha_pixel, val); + const unsigned char max_alpha = alpha_pixel.alpha; + + const long rise = (((long)y2) - ((long)y1)); + const long run = (((long)x2) - ((long)x1)); + if (std::abs(rise) < std::abs(run)) + { + const double slope = ((double)rise)/run; + + + double first, last; + + + if (x1 > x2) + { + first = std::max(x2,valid_area.left()); + last = std::min(x1,valid_area.right()); + } + else + { + first = std::max(x1,valid_area.left()); + last = std::min(x2,valid_area.right()); + } + + long y; + long x; + const double x1f = x1; + const double y1f = y1; + for (double i = first; i <= last; ++i) + { + const double dy = slope*(i-x1f) + y1f; + const double dx = i; + + y = static_cast<long>(dy); + x = static_cast<long>(dx); + + + if (y >= valid_area.top() && y <= valid_area.bottom()) + { + alpha_pixel.alpha = static_cast<unsigned char>((1.0-(dy-y))*max_alpha); + assign_pixel(c[y][x], alpha_pixel); + } + if (y+1 >= valid_area.top() && y+1 <= valid_area.bottom()) + { + alpha_pixel.alpha = static_cast<unsigned char>((dy-y)*max_alpha); + assign_pixel(c[y+1][x], alpha_pixel); + } + } + } + else + { + const double slope = ((double)run)/rise; + + + double first, last; + + + if (y1 > y2) + { + first = std::max(y2,valid_area.top()); + last = std::min(y1,valid_area.bottom()); + } + else + { + first = std::max(y1,valid_area.top()); + last = std::min(y2,valid_area.bottom()); + } + + long x; + long y; + const double x1f = x1; + const double y1f = y1; + for (double i = first; i <= last; ++i) + { + const double dx = slope*(i-y1f) + x1f; + const double dy = i; + + y = static_cast<long>(dy); + x = static_cast<long>(dx); + + if (x >= valid_area.left() && x <= valid_area.right()) + { + alpha_pixel.alpha = static_cast<unsigned char>((1.0-(dx-x))*max_alpha); + assign_pixel(c[y][x], alpha_pixel); + } + if (x+1 >= valid_area.left() && x+1 <= valid_area.right()) + { + alpha_pixel.alpha = static_cast<unsigned char>((dx-x)*max_alpha); + assign_pixel(c[y][x+1], alpha_pixel); + } + } + } + } + + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type, + typename pixel_type + > + void draw_line ( + image_type& c, + const point& p1, + const point& p2, + const pixel_type& val + ) + { + draw_line(p1.x(),p1.y(),p2.x(),p2.y(),c,val); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type, + typename pixel_type + > + void draw_rectangle ( + image_type& c, + const rectangle& rect, + const pixel_type& val + ) + { + draw_line(c, rect.tl_corner(), rect.tr_corner(), val); + draw_line(c, rect.bl_corner(), rect.br_corner(), val); + draw_line(c, rect.tl_corner(), rect.bl_corner(), val); + draw_line(c, rect.tr_corner(), rect.br_corner(), val); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type, + typename pixel_type + > + void draw_rectangle ( + image_type& c, + const rectangle& rect, + const pixel_type& val, + unsigned int thickness + ) + { + for (unsigned int i = 0; i < thickness; ++i) + { + if ((i%2)==0) + draw_rectangle(c,shrink_rect(rect,(i+1)/2),val); + else + draw_rectangle(c,grow_rect(rect,(i+1)/2),val); + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type, + typename pixel_type + > + void fill_rect ( + image_type& img_, + const rectangle& rect, + const pixel_type& pixel + ) + { + image_view<image_type> img(img_); + rectangle area = rect.intersect(get_rect(img)); + + for (long r = area.top(); r <= area.bottom(); ++r) + { + for (long c = area.left(); c <= area.right(); ++c) + { + assign_pixel(img[r][c], pixel); + } + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_array_type + > + matrix<typename image_traits<typename image_array_type::value_type>::pixel_type> tile_images ( + const image_array_type& images + ) + { + typedef typename image_traits<typename image_array_type::value_type>::pixel_type T; + + if (images.size() == 0) + return matrix<T>(); + + const unsigned long size_nc = square_root(images.size()); + const unsigned long size_nr = (size_nc*(size_nc-1)>=images.size())? size_nc-1 : size_nc; + // Figure out the size we have to use for each chip in the big main image. We will + // use the largest dimensions seen across all the chips. + long nr = 0; + long nc = 0; + for (unsigned long i = 0; i < images.size(); ++i) + { + nr = std::max(num_rows(images[i]), nr); + nc = std::max(num_columns(images[i]), nc); + } + + matrix<T> temp(size_nr*nr, size_nc*nc); + T background_color; + assign_pixel(background_color, 0); + temp = background_color; + unsigned long idx = 0; + for (unsigned long r = 0; r < size_nr; ++r) + { + for (unsigned long c = 0; c < size_nc; ++c) + { + if (idx < images.size()) + { + set_subm(temp, r*nr, c*nc, nr, nc) = mat(images[idx]); + } + ++idx; + } + } + return temp; + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type, + typename pixel_type + > + void draw_solid_circle ( + image_type& img_, + const dpoint& center_point, + double radius, + const pixel_type& pixel + ) + { + image_view<image_type> img(img_); + using std::sqrt; + const rectangle valid_area(get_rect(img)); + const double x = center_point.x(); + const double y = center_point.y(); + const point cp(center_point); + if (radius > 1) + { + long first_x = static_cast<long>(x - radius + 0.5); + long last_x = static_cast<long>(x + radius + 0.5); + const double rs = radius*radius; + + // ensure that we only loop over the part of the x dimension that this + // image contains. + if (first_x < valid_area.left()) + first_x = valid_area.left(); + if (last_x > valid_area.right()) + last_x = valid_area.right(); + + long top, bottom; + + top = static_cast<long>(sqrt(std::max(rs - (first_x-x-0.5)*(first_x-x-0.5),0.0))+0.5); + top += y; + long last = top; + + // draw the left half of the circle + long middle = std::min(cp.x()-1,last_x); + for (long i = first_x; i <= middle; ++i) + { + double a = i - x + 0.5; + // find the top of the arc + top = static_cast<long>(sqrt(std::max(rs - a*a,0.0))+0.5); + top += y; + long temp = top; + + while(top >= last) + { + bottom = y - top + y; + draw_line(img_, point(i,top),point(i,bottom),pixel); + --top; + } + + last = temp; + } + + middle = std::max(cp.x(),first_x); + top = static_cast<long>(sqrt(std::max(rs - (last_x-x+0.5)*(last_x-x+0.5),0.0))+0.5); + top += y; + last = top; + // draw the right half of the circle + for (long i = last_x; i >= middle; --i) + { + double a = i - x - 0.5; + // find the top of the arc + top = static_cast<long>(sqrt(std::max(rs - a*a,0.0))+0.5); + top += y; + long temp = top; + + while(top >= last) + { + bottom = y - top + y; + draw_line(img_, point(i,top),point(i,bottom),pixel); + --top; + } + + last = temp; + } + } + else if (valid_area.contains(cp)) + { + // For circles smaller than a pixel we will just alpha blend them in proportion + // to how small they are. + rgb_alpha_pixel temp; + assign_pixel(temp, pixel); + temp.alpha = static_cast<unsigned char>(255*radius + 0.5); + assign_pixel(img[cp.y()][cp.x()], temp); + } + } + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_DRAW_IMAGe_ + + + + diff --git a/ml/dlib/dlib/image_transforms/draw_abstract.h b/ml/dlib/dlib/image_transforms/draw_abstract.h new file mode 100644 index 000000000..6631f8d8f --- /dev/null +++ b/ml/dlib/dlib/image_transforms/draw_abstract.h @@ -0,0 +1,150 @@ +// Copyright (C) 2008 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_DRAW_IMAGe_ABSTRACT +#ifdef DLIB_DRAW_IMAGe_ABSTRACT + +#include "../matrix.h" +#include "../image_processing/generic_image.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type, + typename pixel_type + > + void draw_line ( + image_type& img, + const point& p1, + const point& p2, + const pixel_type& val + ); + /*! + requires + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + ensures + - #img.nr() == img.nr() && #img.nc() == img.nc() + (i.e. the dimensions of the input image are not changed) + - for all valid r and c that are on the line between point p1 and p2: + - performs assign_pixel(img[r][c], val) + (i.e. it draws the line from p1 to p2 onto the image) + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type, + typename pixel_type + > + void draw_line ( + long x1, + long y1, + long x2, + long y2, + image_type& img, + const pixel_type& val + ); + /*! + requires + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + ensures + - performs draw_line(img, point(x1,y1), point(x2,y2), val) + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type, + typename pixel_type + > + void draw_rectangle ( + image_type& img, + const rectangle& rect, + const pixel_type& val, + unsigned int thickness = 1 + ); + /*! + requires + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - pixel_traits<pixel_type> is defined + ensures + - Draws the given rectangle onto the image img. It does this by calling + draw_line() four times to draw the four sides of the rectangle. + - The rectangle is drawn with the color given by val. + - The drawn rectangle will have edges that are thickness pixels wide. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type, + typename pixel_type + > + void draw_solid_circle ( + image_type& img, + const dpoint& center_point, + double radius, + const pixel_type& pixel + ); + /*! + requires + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - pixel_traits<pixel_type> is defined + ensures + - Draws a fully filled in circle onto image that is centered at center_point + and has the given radius. The circle will be filled by assigning the given + pixel value to each element of the circle. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type, + typename pixel_type + > + void fill_rect ( + image_type& img, + const rectangle& rect, + const pixel_type& pixel + ); + /*! + requires + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - pixel_traits<pixel_type> is defined + ensures + - fills the area defined by rect in the given image with the given pixel value. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_array_type + > + matrix<typename image_traits<typename image_array_type::value_type>::pixel_type> tile_images ( + const image_array_type& images + ); + /*! + requires + - image_array_type is a dlib::array of image objects where each image object + implements the interface defined in dlib/image_processing/generic_image.h + ensures + - This function takes the given images and tiles them into a single large + square image and returns this new big tiled image. Therefore, it is a useful + method to visualize many small images at once. + !*/ + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_DRAW_IMAGe_ABSTRACT + + + diff --git a/ml/dlib/dlib/image_transforms/edge_detector.h b/ml/dlib/dlib/image_transforms/edge_detector.h new file mode 100644 index 000000000..2fa898fed --- /dev/null +++ b/ml/dlib/dlib/image_transforms/edge_detector.h @@ -0,0 +1,302 @@ +// Copyright (C) 2008 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_EDGE_DETECTOr_ +#define DLIB_EDGE_DETECTOr_ + +#include "edge_detector_abstract.h" +#include "../pixel.h" +#include "../array2d.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + inline char edge_orientation ( + const T& x_, + const T& y_ + ) + { + + // if this is a perfectly horizontal gradient then return right away + if (x_ == 0) + { + return '|'; + } + else if (y_ == 0) // if this is a perfectly vertical gradient then return right away + { + return '-'; + } + + // Promote x so that when we multiply by 128 later we know overflow won't happen. + typedef typename promote<T>::type type; + type x = x_; + type y = y_; + + if (x < 0) + { + x = -x; + if (y < 0) + { + y = -y; + x *= 128; + const type temp = x/y; + if (temp > 309) + return '-'; + else if (temp > 53) + return '/'; + else + return '|'; + } + else + { + x *= 128; + const type temp = x/y; + if (temp > 309) + return '-'; + else if (temp > 53) + return '\\'; + else + return '|'; + } + } + else + { + if (y < 0) + { + y = -y; + x *= 128; + + const type temp = x/y; + if (temp > 309) + return '-'; + else if (temp > 53) + return '\\'; + else + return '|'; + } + else + { + x *= 128; + + const type temp = x/y; + if (temp > 309) + return '-'; + else if (temp > 53) + return '/'; + else + return '|'; + } + } + + } + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type + > + void sobel_edge_detector ( + const in_image_type& in_img_, + out_image_type& horz_, + out_image_type& vert_ + ) + { + typedef typename image_traits<out_image_type>::pixel_type pixel_type; + COMPILE_TIME_ASSERT(pixel_traits<pixel_type>::is_unsigned == false); + DLIB_ASSERT( !is_same_object(in_img_,horz_) && !is_same_object(in_img_,vert_) && + !is_same_object(horz_,vert_), + "\tvoid sobel_edge_detector(in_img_, horz_, vert_)" + << "\n\t You can't give the same image as more than one argument" + << "\n\t is_same_object(in_img_,horz_): " << is_same_object(in_img_,horz_) + << "\n\t is_same_object(in_img_,vert_): " << is_same_object(in_img_,vert_) + << "\n\t is_same_object(horz_,vert_): " << is_same_object(horz_,vert_) + ); + + + const int vert_filter[3][3] = {{-1,-2,-1}, + {0,0,0}, + {1,2,1}}; + const int horz_filter[3][3] = { {-1,0,1}, + {-2,0,2}, + {-1,0,1}}; + + const long M = 3; + const long N = 3; + + + const_image_view<in_image_type> in_img(in_img_); + image_view<out_image_type> horz(horz_); + image_view<out_image_type> vert(vert_); + + horz.set_size(in_img.nr(),in_img.nc()); + vert.set_size(in_img.nr(),in_img.nc()); + + assign_border_pixels(horz,1,1,0); + assign_border_pixels(vert,1,1,0); + + // figure out the range that we should apply the filter to + const long first_row = M/2; + const long first_col = N/2; + const long last_row = in_img.nr() - M/2; + const long last_col = in_img.nc() - N/2; + + + // apply the filter to the image + for (long r = first_row; r < last_row; ++r) + { + for (long c = first_col; c < last_col; ++c) + { + typedef typename pixel_traits<typename image_traits<in_image_type>::pixel_type>::basic_pixel_type bp_type; + + typename promote<bp_type>::type p, horz_temp, vert_temp; + horz_temp = 0; + vert_temp = 0; + for (long m = 0; m < M; ++m) + { + for (long n = 0; n < N; ++n) + { + // pull out the current pixel and put it into p + p = get_pixel_intensity(in_img[r-M/2+m][c-N/2+n]); + + horz_temp += p*horz_filter[m][n]; + vert_temp += p*vert_filter[m][n]; + } + } + + assign_pixel(horz[r][c] , horz_temp); + assign_pixel(vert[r][c] , vert_temp); + + } + } + } + +// ---------------------------------------------------------------------------------------- + + namespace impl + { + template <typename T> + typename promote<T>::type square (const T& a) + { + return static_cast<T>(a)*static_cast<T>(a); + } + } + + template < + typename in_image_type, + typename out_image_type + > + void suppress_non_maximum_edges ( + const in_image_type& horz_, + const in_image_type& vert_, + out_image_type& out_img_ + ) + { + const_image_view<in_image_type> horz(horz_); + const_image_view<in_image_type> vert(vert_); + image_view<out_image_type> out_img(out_img_); + + COMPILE_TIME_ASSERT(is_signed_type<typename image_traits<in_image_type>::pixel_type>::value); + DLIB_ASSERT( horz.nr() == vert.nr() && horz.nc() == vert.nc(), + "\tvoid suppress_non_maximum_edges(horz, vert, out_img)" + << "\n\tYou have to give horz and vert gradient images that are the same size" + << "\n\thorz.nr(): " << horz.nr() + << "\n\thorz.nc(): " << horz.nc() + << "\n\tvert.nr(): " << vert.nr() + << "\n\tvert.nc(): " << vert.nc() + ); + DLIB_ASSERT( !is_same_object(out_img_,horz_) && !is_same_object(out_img_,vert_), + "\tvoid suppress_non_maximum_edges(horz_, vert_, out_img_)" + << "\n\t out_img can't be the same as one of the input images." + << "\n\t is_same_object(out_img_,horz_): " << is_same_object(out_img_,horz_) + << "\n\t is_same_object(out_img_,vert_): " << is_same_object(out_img_,vert_) + ); + + using std::min; + using std::abs; + + + // if there isn't any input image then don't do anything + if (horz.size() == 0) + { + out_img.clear(); + return; + } + + out_img.set_size(horz.nr(),horz.nc()); + + zero_border_pixels(out_img,1,1); + + // now do non maximum suppression while we copy the + const long M = 3; + const long N = 3; + + // figure out the range that we should apply the filter to + const long first_row = M/2; + const long first_col = N/2; + const long last_row = horz.nr() - M/2; + const long last_col = horz.nc() - N/2; + + + // apply the filter to the image + for (long r = first_row; r < last_row; ++r) + { + for (long c = first_col; c < last_col; ++c) + { + typedef typename promote<typename image_traits<in_image_type>::pixel_type>::type T; + const T y = horz[r][c]; + const T x = vert[r][c]; + + using impl::square; + + const T val = square(horz[r][c]) + square(vert[r][c]); + + const char ori = edge_orientation(x,y); + const unsigned char zero = 0; + switch (ori) + { + case '-': + if (square(horz[r-1][c])+square(vert[r-1][c]) > val || square(horz[r+1][c]) + square(vert[r+1][c]) > val) + assign_pixel(out_img[r][c] , zero); + else + assign_pixel(out_img[r][c] , std::sqrt((double)val)); + break; + + case '|': + if (square(horz[r][c-1]) + square(vert[r][c-1]) > val || square(horz[r][c+1]) + square(vert[r][c+1]) > val) + assign_pixel(out_img[r][c] , zero); + else + assign_pixel(out_img[r][c] , std::sqrt((double)val)); + break; + + case '/': + if (square(horz[r-1][c-1]) + square(vert[r-1][c-1]) > val || square(horz[r+1][c+1]) + square(vert[r+1][c+1]) > val) + assign_pixel(out_img[r][c] , zero); + else + assign_pixel(out_img[r][c] , std::sqrt((double)val)); + break; + + case '\\': + if (square(horz[r+1][c-1]) + square(vert[r+1][c-1]) > val || square(horz[r-1][c+1]) + square(vert[r-1][c+1]) > val) + assign_pixel(out_img[r][c] , zero); + else + assign_pixel(out_img[r][c] , std::sqrt((double)val)); + break; + + } + } + } + } + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_EDGE_DETECTOr_ + + + diff --git a/ml/dlib/dlib/image_transforms/edge_detector_abstract.h b/ml/dlib/dlib/image_transforms/edge_detector_abstract.h new file mode 100644 index 000000000..42c991665 --- /dev/null +++ b/ml/dlib/dlib/image_transforms/edge_detector_abstract.h @@ -0,0 +1,112 @@ +// Copyright (C) 2008 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_EDGE_DETECTOr_ABSTRACT_ +#ifdef DLIB_EDGE_DETECTOr_ABSTRACT_ + +#include "../pixel.h" +#include "../image_processing/generic_image.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + inline char edge_orientation ( + const T& x, + const T& y + ); + /*! + ensures + - returns the orientation of the line drawn from the origin to the point (x,y). + The orientation is represented pictorially using the four ascii + characters /,|,\, and -. + - if (the line is horizontal) then + returns '-' + - if (the line is vertical) then + returns '|' + - if (the line is diagonal with a positive slope) then + returns '/' + - if (the line is diagonal with a negative slope) then + returns '\\' + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type + > + void sobel_edge_detector ( + const in_image_type& in_img, + out_image_type& horz, + out_image_type& vert + ); + /*! + requires + - in_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - out_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - out_image_type must use signed grayscale pixels + - is_same_object(in_img,horz) == false + - is_same_object(in_img,vert) == false + - is_same_object(horz,vert) == false + ensures + - Applies the sobel edge detector to the given input image and stores the resulting + edge detections in the horz and vert images + - #horz.nr() == in_img.nr() + - #horz.nc() == in_img.nc() + - #vert.nr() == in_img.nr() + - #vert.nc() == in_img.nc() + - for all valid r and c: + - #horz[r][c] == the magnitude of the horizontal gradient at the point in_img[r][c] + - #vert[r][c] == the magnitude of the vertical gradient at the point in_img[r][c] + - edge_orientation(#vert[r][c], #horz[r][c]) == the edge direction at this point in + the image + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type + > + void suppress_non_maximum_edges ( + const in_image_type& horz, + const in_image_type& vert, + out_image_type& out_img + ); + /*! + requires + - in_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - out_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - horz.nr() == vert.nr() + - horz.nc() == vert.nc() + - is_same_object(out_img, horz) == false + - is_same_object(out_img, vert) == false + - image_traits<in_image_type>::pixel_type == A signed scalar type (e.g. int, double, etc.) + ensures + - #out_img.nr() = horz.nr() + - #out_img.nc() = horz.nc() + - let edge_strength(r,c) == sqrt(pow(horz[r][c],2) + pow(vert[r][c],2)) + (i.e. The Euclidean norm of the gradient) + - for all valid r and c: + - if (edge_strength(r,c) is at a maximum with respect to its 2 neighboring + pixels along the line given by edge_orientation(vert[r][c],horz[r][c])) then + - performs assign_pixel(#out_img[r][c], edge_strength(r,c)) + - else + - performs assign_pixel(#out_img[r][c], 0) + !*/ + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_EDGE_DETECTOr_ABSTRACT_ + + diff --git a/ml/dlib/dlib/image_transforms/equalize_histogram.h b/ml/dlib/dlib/image_transforms/equalize_histogram.h new file mode 100644 index 000000000..dd048759a --- /dev/null +++ b/ml/dlib/dlib/image_transforms/equalize_histogram.h @@ -0,0 +1,143 @@ +// Copyright (C) 2006 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_EQUALIZE_HISTOGRAm_ +#define DLIB_EQUALIZE_HISTOGRAm_ + +#include "../pixel.h" +#include "equalize_histogram_abstract.h" +#include <vector> +#include "../enable_if.h" +#include "../matrix.h" + +namespace dlib +{ + +// --------------------------------------------------------------------------------------- + + template < + typename in_image_type, + long R, + long C, + typename MM + > + void get_histogram ( + const in_image_type& in_img_, + matrix<unsigned long,R,C,MM>& hist + ) + { + typedef typename image_traits<in_image_type>::pixel_type pixel_type; + COMPILE_TIME_ASSERT( pixel_traits<pixel_type>::is_unsigned == true ); + + typedef typename pixel_traits<pixel_type>::basic_pixel_type in_image_basic_pixel_type; + COMPILE_TIME_ASSERT( sizeof(in_image_basic_pixel_type) <= 2); + + // make sure hist is the right size + if (R == 1) + hist.set_size(1,pixel_traits<pixel_type>::max()+1); + else + hist.set_size(pixel_traits<pixel_type>::max()+1,1); + + + set_all_elements(hist,0); + + const_image_view<in_image_type> in_img(in_img_); + // compute the histogram + for (long r = 0; r < in_img.nr(); ++r) + { + for (long c = 0; c < in_img.nc(); ++c) + { + unsigned long p = get_pixel_intensity(in_img[r][c]); + ++hist(p); + } + } + } + +// --------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type + > + void equalize_histogram ( + const in_image_type& in_img_, + out_image_type& out_img_ + ) + { + const_image_view<in_image_type> in_img(in_img_); + image_view<out_image_type> out_img(out_img_); + + typedef typename image_traits<in_image_type>::pixel_type in_pixel_type; + typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; + + COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false ); + COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false ); + + COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::is_unsigned == true ); + COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::is_unsigned == true ); + + typedef typename pixel_traits<in_pixel_type>::basic_pixel_type in_image_basic_pixel_type; + COMPILE_TIME_ASSERT( sizeof(in_image_basic_pixel_type) <= 2); + + + // if there isn't any input image then don't do anything + if (in_img.size() == 0) + { + out_img.clear(); + return; + } + + out_img.set_size(in_img.nr(),in_img.nc()); + + unsigned long p; + + matrix<unsigned long,1,0> histogram; + get_histogram(in_img_, histogram); + in_img = in_img_; + + double scale = pixel_traits<out_pixel_type>::max(); + if (in_img.size() > histogram(0)) + scale /= in_img.size()-histogram(0); + else + scale = 0; + + // make the black pixels remain black in the output image + histogram(0) = 0; + + // compute the transform function + for (long i = 1; i < histogram.size(); ++i) + histogram(i) += histogram(i-1); + // scale so that it is in the range [0,pixel_traits<out_pixel_type>::max()] + for (long i = 0; i < histogram.size(); ++i) + histogram(i) = static_cast<unsigned long>(histogram(i)*scale); + + // now do the transform + for (long row = 0; row < in_img.nr(); ++row) + { + for (long col = 0; col < in_img.nc(); ++col) + { + p = histogram(get_pixel_intensity(in_img[row][col])); + assign_pixel(out_img[row][col], in_img[row][col]); + assign_pixel_intensity(out_img[row][col],p); + } + } + + } + + template < + typename image_type + > + void equalize_histogram ( + image_type& img + ) + { + equalize_histogram(img,img); + } + +// --------------------------------------------------------------------------------------- + +} + +#endif // DLIB_EQUALIZE_HISTOGRAm_ + + + diff --git a/ml/dlib/dlib/image_transforms/equalize_histogram_abstract.h b/ml/dlib/dlib/image_transforms/equalize_histogram_abstract.h new file mode 100644 index 000000000..2592aef1a --- /dev/null +++ b/ml/dlib/dlib/image_transforms/equalize_histogram_abstract.h @@ -0,0 +1,91 @@ +// Copyright (C) 2006 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_EQUALIZE_HISTOGRAm_ABSTRACT_ +#ifdef DLIB_EQUALIZE_HISTOGRAm_ABSTRACT_ + +#include "../pixel.h" +#include "../matrix.h" +#include "../image_processing/generic_image.h" + +namespace dlib +{ + +// --------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type + > + void equalize_histogram ( + const in_image_type& in_img, + out_image_type& out_img + ); + /*! + requires + - in_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - out_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - Let pixel_type be the type of pixel in either input or output images, then we + must have: + - pixel_traits<pixel_type>::has_alpha == false + - pixel_traits<pixel_type>::is_unsigned == true + - For the input image pixel type, we have the additional requirement that: + - pixel_traits<pixel_type>::max() <= 65535 + ensures + - #out_img == the histogram equalized version of in_img + - #out_img.nc() == in_img.nc() + - #out_img.nr() == in_img.nr() + !*/ + + template < + typename image_type + > + void equalize_histogram ( + image_type& img + ); + /*! + requires + - it is valid to call equalize_histogram(img,img) + ensures + - calls equalize_histogram(img,img); + !*/ + +// --------------------------------------------------------------------------------------- + + template < + typename in_image_type, + long R, + long C, + typename MM + > + void get_histogram ( + const in_image_type& in_img, + matrix<unsigned long,R,C,MM>& hist + ); + /*! + requires + - in_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - Let pixel_type denote the type of pixel in in_img, then we must have: + - pixel_traits<pixel_type>::is_unsigned == true + - pixel_traits<pixel_type>::max() <= 65535 + - hist must be capable of representing a column vector of length + pixel_traits<typename in_image_type>::max(). I.e. if R and C are nonzero + then they must be values that don't conflict with the previous sentence. + ensures + - #hist.size() == pixel_traits<typename in_image_type>::max() + - #hist.nc() == 1 || #hist.nr() == 1 (i.e. hist is either a row or column vector) + - #hist == the histogram for in_img. I.e. it is the case that for all + valid i: + - hist(i) == the number of times a pixel with intensity i appears + in in_img + !*/ + +// --------------------------------------------------------------------------------------- + +} + +#endif // DLIB_EQUALIZE_HISTOGRAm_ABSTRACT_ + + diff --git a/ml/dlib/dlib/image_transforms/fhog.h b/ml/dlib/dlib/image_transforms/fhog.h new file mode 100644 index 000000000..d99973adf --- /dev/null +++ b/ml/dlib/dlib/image_transforms/fhog.h @@ -0,0 +1,1404 @@ +// Copyright (C) 2013 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_fHOG_Hh_ +#define DLIB_fHOG_Hh_ + +#include "fhog_abstract.h" +#include "../matrix.h" +#include "../array2d.h" +#include "../array.h" +#include "../geometry.h" +#include "assign_image.h" +#include "draw.h" +#include "interpolation.h" +#include "../simd.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + namespace impl_fhog + { + template <typename image_type, typename T> + inline typename dlib::enable_if_c<pixel_traits<typename image_type::pixel_type>::rgb>::type get_gradient ( + const int r, + const int c, + const image_type& img, + matrix<T,2,1>& grad, + T& len + ) + { + matrix<T, 2, 1> grad2, grad3; + // get the red gradient + grad(0) = (int)img[r][c+1].red-(int)img[r][c-1].red; + grad(1) = (int)img[r+1][c].red-(int)img[r-1][c].red; + len = length_squared(grad); + + // get the green gradient + grad2(0) = (int)img[r][c+1].green-(int)img[r][c-1].green; + grad2(1) = (int)img[r+1][c].green-(int)img[r-1][c].green; + T v2 = length_squared(grad2); + + // get the blue gradient + grad3(0) = (int)img[r][c+1].blue-(int)img[r][c-1].blue; + grad3(1) = (int)img[r+1][c].blue-(int)img[r-1][c].blue; + T v3 = length_squared(grad3); + + // pick color with strongest gradient + if (v2 > len) + { + len = v2; + grad = grad2; + } + if (v3 > len) + { + len = v3; + grad = grad3; + } + } + + template <typename image_type> + inline typename dlib::enable_if_c<pixel_traits<typename image_type::pixel_type>::rgb>::type get_gradient ( + const int r, + const int c, + const image_type& img, + simd4f& grad_x, + simd4f& grad_y, + simd4f& len + ) + { + simd4i rleft((int)img[r][c-1].red, + (int)img[r][c].red, + (int)img[r][c+1].red, + (int)img[r][c+2].red); + simd4i rright((int)img[r][c+1].red, + (int)img[r][c+2].red, + (int)img[r][c+3].red, + (int)img[r][c+4].red); + simd4i rtop((int)img[r-1][c].red, + (int)img[r-1][c+1].red, + (int)img[r-1][c+2].red, + (int)img[r-1][c+3].red); + simd4i rbottom((int)img[r+1][c].red, + (int)img[r+1][c+1].red, + (int)img[r+1][c+2].red, + (int)img[r+1][c+3].red); + + simd4i gleft((int)img[r][c-1].green, + (int)img[r][c].green, + (int)img[r][c+1].green, + (int)img[r][c+2].green); + simd4i gright((int)img[r][c+1].green, + (int)img[r][c+2].green, + (int)img[r][c+3].green, + (int)img[r][c+4].green); + simd4i gtop((int)img[r-1][c].green, + (int)img[r-1][c+1].green, + (int)img[r-1][c+2].green, + (int)img[r-1][c+3].green); + simd4i gbottom((int)img[r+1][c].green, + (int)img[r+1][c+1].green, + (int)img[r+1][c+2].green, + (int)img[r+1][c+3].green); + + simd4i bleft((int)img[r][c-1].blue, + (int)img[r][c].blue, + (int)img[r][c+1].blue, + (int)img[r][c+2].blue); + simd4i bright((int)img[r][c+1].blue, + (int)img[r][c+2].blue, + (int)img[r][c+3].blue, + (int)img[r][c+4].blue); + simd4i btop((int)img[r-1][c].blue, + (int)img[r-1][c+1].blue, + (int)img[r-1][c+2].blue, + (int)img[r-1][c+3].blue); + simd4i bbottom((int)img[r+1][c].blue, + (int)img[r+1][c+1].blue, + (int)img[r+1][c+2].blue, + (int)img[r+1][c+3].blue); + + simd4i grad_x_red = rright-rleft; + simd4i grad_y_red = rbottom-rtop; + simd4i grad_x_green = gright-gleft; + simd4i grad_y_green = gbottom-gtop; + simd4i grad_x_blue = bright-bleft; + simd4i grad_y_blue = bbottom-btop; + + simd4i rlen = grad_x_red*grad_x_red + grad_y_red*grad_y_red; + simd4i glen = grad_x_green*grad_x_green + grad_y_green*grad_y_green; + simd4i blen = grad_x_blue*grad_x_blue + grad_y_blue*grad_y_blue; + + simd4i cmp = rlen>glen; + simd4i tgrad_x = select(cmp,grad_x_red,grad_x_green); + simd4i tgrad_y = select(cmp,grad_y_red,grad_y_green); + simd4i tlen = select(cmp,rlen,glen); + + cmp = tlen>blen; + grad_x = select(cmp,tgrad_x,grad_x_blue); + grad_y = select(cmp,tgrad_y,grad_y_blue); + len = select(cmp,tlen,blen); + } + + // ------------------------------------------------------------------------------------ + + template <typename image_type> + inline typename dlib::enable_if_c<pixel_traits<typename image_type::pixel_type>::rgb>::type get_gradient( + const int r, + const int c, + const image_type& img, + simd8f& grad_x, + simd8f& grad_y, + simd8f& len + ) + { + simd8i rleft((int)img[r][c - 1].red, + (int)img[r][c].red, + (int)img[r][c + 1].red, + (int)img[r][c + 2].red, + (int)img[r][c + 3].red, + (int)img[r][c + 4].red, + (int)img[r][c + 5].red, + (int)img[r][c + 6].red); + simd8i rright((int)img[r][c + 1].red, + (int)img[r][c + 2].red, + (int)img[r][c + 3].red, + (int)img[r][c + 4].red, + (int)img[r][c + 5].red, + (int)img[r][c + 6].red, + (int)img[r][c + 7].red, + (int)img[r][c + 8].red); + simd8i rtop((int)img[r - 1][c].red, + (int)img[r - 1][c + 1].red, + (int)img[r - 1][c + 2].red, + (int)img[r - 1][c + 3].red, + (int)img[r - 1][c + 4].red, + (int)img[r - 1][c + 5].red, + (int)img[r - 1][c + 6].red, + (int)img[r - 1][c + 7].red); + simd8i rbottom((int)img[r + 1][c].red, + (int)img[r + 1][c + 1].red, + (int)img[r + 1][c + 2].red, + (int)img[r + 1][c + 3].red, + (int)img[r + 1][c + 4].red, + (int)img[r + 1][c + 5].red, + (int)img[r + 1][c + 6].red, + (int)img[r + 1][c + 7].red); + + simd8i gleft((int)img[r][c - 1].green, + (int)img[r][c].green, + (int)img[r][c + 1].green, + (int)img[r][c + 2].green, + (int)img[r][c + 3].green, + (int)img[r][c + 4].green, + (int)img[r][c + 5].green, + (int)img[r][c + 6].green); + simd8i gright((int)img[r][c + 1].green, + (int)img[r][c + 2].green, + (int)img[r][c + 3].green, + (int)img[r][c + 4].green, + (int)img[r][c + 5].green, + (int)img[r][c + 6].green, + (int)img[r][c + 7].green, + (int)img[r][c + 8].green); + simd8i gtop((int)img[r - 1][c].green, + (int)img[r - 1][c + 1].green, + (int)img[r - 1][c + 2].green, + (int)img[r - 1][c + 3].green, + (int)img[r - 1][c + 4].green, + (int)img[r - 1][c + 5].green, + (int)img[r - 1][c + 6].green, + (int)img[r - 1][c + 7].green); + simd8i gbottom((int)img[r + 1][c].green, + (int)img[r + 1][c + 1].green, + (int)img[r + 1][c + 2].green, + (int)img[r + 1][c + 3].green, + (int)img[r + 1][c + 4].green, + (int)img[r + 1][c + 5].green, + (int)img[r + 1][c + 6].green, + (int)img[r + 1][c + 7].green); + + simd8i bleft((int)img[r][c - 1].blue, + (int)img[r][c].blue, + (int)img[r][c + 1].blue, + (int)img[r][c + 2].blue, + (int)img[r][c + 3].blue, + (int)img[r][c + 4].blue, + (int)img[r][c + 5].blue, + (int)img[r][c + 6].blue); + simd8i bright((int)img[r][c + 1].blue, + (int)img[r][c + 2].blue, + (int)img[r][c + 3].blue, + (int)img[r][c + 4].blue, + (int)img[r][c + 5].blue, + (int)img[r][c + 6].blue, + (int)img[r][c + 7].blue, + (int)img[r][c + 8].blue); + simd8i btop((int)img[r - 1][c].blue, + (int)img[r - 1][c + 1].blue, + (int)img[r - 1][c + 2].blue, + (int)img[r - 1][c + 3].blue, + (int)img[r - 1][c + 4].blue, + (int)img[r - 1][c + 5].blue, + (int)img[r - 1][c + 6].blue, + (int)img[r - 1][c + 7].blue); + simd8i bbottom((int)img[r + 1][c].blue, + (int)img[r + 1][c + 1].blue, + (int)img[r + 1][c + 2].blue, + (int)img[r + 1][c + 3].blue, + (int)img[r + 1][c + 4].blue, + (int)img[r + 1][c + 5].blue, + (int)img[r + 1][c + 6].blue, + (int)img[r + 1][c + 7].blue); + + simd8i grad_x_red = rright - rleft; + simd8i grad_y_red = rbottom - rtop; + simd8i grad_x_green = gright - gleft; + simd8i grad_y_green = gbottom - gtop; + simd8i grad_x_blue = bright - bleft; + simd8i grad_y_blue = bbottom - btop; + + simd8i rlen = grad_x_red*grad_x_red + grad_y_red*grad_y_red; + simd8i glen = grad_x_green*grad_x_green + grad_y_green*grad_y_green; + simd8i blen = grad_x_blue*grad_x_blue + grad_y_blue*grad_y_blue; + + simd8i cmp = rlen > glen; + simd8i tgrad_x = select(cmp, grad_x_red, grad_x_green); + simd8i tgrad_y = select(cmp, grad_y_red, grad_y_green); + simd8i tlen = select(cmp, rlen, glen); + + cmp = tlen > blen; + grad_x = select(cmp, tgrad_x, grad_x_blue); + grad_y = select(cmp, tgrad_y, grad_y_blue); + len = select(cmp, tlen, blen); + } + + // ------------------------------------------------------------------------------------ + + template <typename image_type, typename T> + inline typename dlib::disable_if_c<pixel_traits<typename image_type::pixel_type>::rgb>::type get_gradient ( + const int r, + const int c, + const image_type& img, + matrix<T, 2, 1>& grad, + T& len + ) + { + grad(0) = (int)get_pixel_intensity(img[r][c+1])-(int)get_pixel_intensity(img[r][c-1]); + grad(1) = (int)get_pixel_intensity(img[r+1][c])-(int)get_pixel_intensity(img[r-1][c]); + len = length_squared(grad); + } + + template <typename image_type> + inline typename dlib::disable_if_c<pixel_traits<typename image_type::pixel_type>::rgb>::type get_gradient ( + int r, + int c, + const image_type& img, + simd4f& grad_x, + simd4f& grad_y, + simd4f& len + ) + { + simd4i left((int)get_pixel_intensity(img[r][c-1]), + (int)get_pixel_intensity(img[r][c]), + (int)get_pixel_intensity(img[r][c+1]), + (int)get_pixel_intensity(img[r][c+2])); + simd4i right((int)get_pixel_intensity(img[r][c+1]), + (int)get_pixel_intensity(img[r][c+2]), + (int)get_pixel_intensity(img[r][c+3]), + (int)get_pixel_intensity(img[r][c+4])); + + simd4i top((int)get_pixel_intensity(img[r-1][c]), + (int)get_pixel_intensity(img[r-1][c+1]), + (int)get_pixel_intensity(img[r-1][c+2]), + (int)get_pixel_intensity(img[r-1][c+3])); + simd4i bottom((int)get_pixel_intensity(img[r+1][c]), + (int)get_pixel_intensity(img[r+1][c+1]), + (int)get_pixel_intensity(img[r+1][c+2]), + (int)get_pixel_intensity(img[r+1][c+3])); + + grad_x = right-left; + grad_y = bottom-top; + + len = (grad_x*grad_x + grad_y*grad_y); + } + + // ------------------------------------------------------------------------------------ + + template <typename image_type> + inline typename dlib::disable_if_c<pixel_traits<typename image_type::pixel_type>::rgb>::type get_gradient( + int r, + int c, + const image_type& img, + simd8f& grad_x, + simd8f& grad_y, + simd8f& len + ) + { + simd8i left((int)get_pixel_intensity(img[r][c - 1]), + (int)get_pixel_intensity(img[r][c]), + (int)get_pixel_intensity(img[r][c + 1]), + (int)get_pixel_intensity(img[r][c + 2]), + (int)get_pixel_intensity(img[r][c + 3]), + (int)get_pixel_intensity(img[r][c + 4]), + (int)get_pixel_intensity(img[r][c + 5]), + (int)get_pixel_intensity(img[r][c + 6])); + simd8i right((int)get_pixel_intensity(img[r][c + 1]), + (int)get_pixel_intensity(img[r][c + 2]), + (int)get_pixel_intensity(img[r][c + 3]), + (int)get_pixel_intensity(img[r][c + 4]), + (int)get_pixel_intensity(img[r][c + 5]), + (int)get_pixel_intensity(img[r][c + 6]), + (int)get_pixel_intensity(img[r][c + 7]), + (int)get_pixel_intensity(img[r][c + 8])); + + simd8i top((int)get_pixel_intensity(img[r - 1][c]), + (int)get_pixel_intensity(img[r - 1][c + 1]), + (int)get_pixel_intensity(img[r - 1][c + 2]), + (int)get_pixel_intensity(img[r - 1][c + 3]), + (int)get_pixel_intensity(img[r - 1][c + 4]), + (int)get_pixel_intensity(img[r - 1][c + 5]), + (int)get_pixel_intensity(img[r - 1][c + 6]), + (int)get_pixel_intensity(img[r - 1][c + 7])); + simd8i bottom((int)get_pixel_intensity(img[r + 1][c]), + (int)get_pixel_intensity(img[r + 1][c + 1]), + (int)get_pixel_intensity(img[r + 1][c + 2]), + (int)get_pixel_intensity(img[r + 1][c + 3]), + (int)get_pixel_intensity(img[r + 1][c + 4]), + (int)get_pixel_intensity(img[r + 1][c + 5]), + (int)get_pixel_intensity(img[r + 1][c + 6]), + (int)get_pixel_intensity(img[r + 1][c + 7])); + + grad_x = right - left; + grad_y = bottom - top; + + len = (grad_x*grad_x + grad_y*grad_y); + } + + // ------------------------------------------------------------------------------------ + + template <typename T, typename mm1, typename mm2> + inline void set_hog ( + dlib::array<array2d<T,mm1>,mm2>& hog, + int o, + int x, + int y, + const float& value + ) + { + hog[o][y][x] = value; + } + + template <typename T, typename mm1, typename mm2> + void init_hog ( + dlib::array<array2d<T,mm1>,mm2>& hog, + int hog_nr, + int hog_nc, + int filter_rows_padding, + int filter_cols_padding + ) + { + const int num_hog_bands = 27+4; + hog.resize(num_hog_bands); + for (int i = 0; i < num_hog_bands; ++i) + { + hog[i].set_size(hog_nr+filter_rows_padding-1, hog_nc+filter_cols_padding-1); + rectangle rect = get_rect(hog[i]); + rect.top() += (filter_rows_padding-1)/2; + rect.left() += (filter_cols_padding-1)/2; + rect.right() -= filter_cols_padding/2; + rect.bottom() -= filter_rows_padding/2; + zero_border_pixels(hog[i],rect); + } + } + + template <typename T, typename mm1, typename mm2> + void init_hog_zero_everything ( + dlib::array<array2d<T,mm1>,mm2>& hog, + int hog_nr, + int hog_nc, + int filter_rows_padding, + int filter_cols_padding + ) + { + const int num_hog_bands = 27+4; + hog.resize(num_hog_bands); + for (int i = 0; i < num_hog_bands; ++i) + { + hog[i].set_size(hog_nr+filter_rows_padding-1, hog_nc+filter_cols_padding-1); + assign_all_pixels(hog[i], 0); + } + } + + // ------------------------------------------------------------------------------------ + + template <typename T, typename mm> + inline void set_hog ( + array2d<matrix<T,31,1>,mm>& hog, + int o, + int x, + int y, + const float& value + ) + { + hog[y][x](o) = value; + } + + template <typename T, typename mm> + void init_hog ( + array2d<matrix<T,31,1>,mm>& hog, + int hog_nr, + int hog_nc, + int filter_rows_padding, + int filter_cols_padding + ) + { + hog.set_size(hog_nr+filter_rows_padding-1, hog_nc+filter_cols_padding-1); + + // now zero out the border region + rectangle rect = get_rect(hog); + rect.top() += (filter_rows_padding-1)/2; + rect.left() += (filter_cols_padding-1)/2; + rect.right() -= filter_cols_padding/2; + rect.bottom() -= filter_rows_padding/2; + border_enumerator be(get_rect(hog),rect); + while (be.move_next()) + { + const point p = be.element(); + set_all_elements(hog[p.y()][p.x()], 0); + } + } + + template <typename T, typename mm> + void init_hog_zero_everything ( + array2d<matrix<T,31,1>,mm>& hog, + int hog_nr, + int hog_nc, + int filter_rows_padding, + int filter_cols_padding + ) + { + hog.set_size(hog_nr+filter_rows_padding-1, hog_nc+filter_cols_padding-1); + + for (long r = 0; r < hog.nr(); ++r) + { + for (long c = 0; c < hog.nc(); ++c) + { + set_all_elements(hog[r][c], 0); + } + } + } + + // ------------------------------------------------------------------------------------ + + template < + typename image_type, + typename out_type + > + void impl_extract_fhog_features_cell_size_1( + const image_type& img_, + out_type& hog, + int filter_rows_padding, + int filter_cols_padding + ) + { + const_image_view<image_type> img(img_); + // make sure requires clause is not broken + DLIB_ASSERT( filter_rows_padding > 0 && + filter_cols_padding > 0 , + "\t void extract_fhog_features()" + << "\n\t Invalid inputs were given to this function. " + << "\n\t filter_rows_padding: " << filter_rows_padding + << "\n\t filter_cols_padding: " << filter_cols_padding + ); + + /* + This function is an optimized version of impl_extract_fhog_features() for + the case where cell_size == 1. + */ + + + // unit vectors used to compute gradient orientation + matrix<float,2,1> directions[9]; + directions[0] = 1.0000, 0.0000; + directions[1] = 0.9397, 0.3420; + directions[2] = 0.7660, 0.6428; + directions[3] = 0.500, 0.8660; + directions[4] = 0.1736, 0.9848; + directions[5] = -0.1736, 0.9848; + directions[6] = -0.5000, 0.8660; + directions[7] = -0.7660, 0.6428; + directions[8] = -0.9397, 0.3420; + + + + if (img.nr() <= 2 || img.nc() <= 2) + { + hog.clear(); + return; + } + + array2d<unsigned char> angle(img.nr(), img.nc()); + + array2d<float> norm(img.nr(), img.nc()); + zero_border_pixels(norm,1,1); + + // memory for HOG features + const long hog_nr = img.nr()-2; + const long hog_nc = img.nc()-2; + + const int padding_rows_offset = (filter_rows_padding-1)/2; + const int padding_cols_offset = (filter_cols_padding-1)/2; + init_hog_zero_everything(hog, hog_nr, hog_nc, filter_rows_padding, filter_cols_padding); + + + const int visible_nr = img.nr()-1; + const int visible_nc = img.nc()-1; + + // First populate the gradient histograms + for (int y = 1; y < visible_nr; y++) + { + int x; + for (x = 1; x < visible_nc - 7; x += 8) + { + // v will be the length of the gradient vectors. + simd8f grad_x, grad_y, v; + get_gradient(y, x, img, grad_x, grad_y, v); + + float _vv[8]; + v.store(_vv); + + // Now snap the gradient to one of 18 orientations + simd8f best_dot = 0; + simd8f best_o = 0; + for (int o = 0; o < 9; o++) + { + simd8f dot = grad_x*directions[o](0) + grad_y*directions[o](1); + simd8f_bool cmp = dot>best_dot; + best_dot = select(cmp, dot, best_dot); + dot *= -1; + best_o = select(cmp, o, best_o); + + cmp = dot > best_dot; + best_dot = select(cmp, dot, best_dot); + best_o = select(cmp, o + 9, best_o); + } + + int32 _best_o[8]; simd8i(best_o).store(_best_o); + + norm[y][x + 0] = _vv[0]; + norm[y][x + 1] = _vv[1]; + norm[y][x + 2] = _vv[2]; + norm[y][x + 3] = _vv[3]; + norm[y][x + 4] = _vv[4]; + norm[y][x + 5] = _vv[5]; + norm[y][x + 6] = _vv[6]; + norm[y][x + 7] = _vv[7]; + + angle[y][x + 0] = _best_o[0]; + angle[y][x + 1] = _best_o[1]; + angle[y][x + 2] = _best_o[2]; + angle[y][x + 3] = _best_o[3]; + angle[y][x + 4] = _best_o[4]; + angle[y][x + 5] = _best_o[5]; + angle[y][x + 6] = _best_o[6]; + angle[y][x + 7] = _best_o[7]; + } + // Now process the right columns that don't fit into simd registers. + for (; x < visible_nc; x++) + { + matrix<float,2,1> grad; + float v; + get_gradient(y,x,img,grad,v); + + // snap to one of 18 orientations + float best_dot = 0; + int best_o = 0; + for (int o = 0; o < 9; o++) + { + const float dot = dlib::dot(directions[o], grad); + if (dot > best_dot) + { + best_dot = dot; + best_o = o; + } + else if (-dot > best_dot) + { + best_dot = -dot; + best_o = o+9; + } + } + + norm[y][x] = v; + angle[y][x] = best_o; + } + } + + const float eps = 0.0001; + // compute features + for (int y = 0; y < hog_nr; y++) + { + const int yy = y+padding_rows_offset; + for (int x = 0; x < hog_nc; x++) + { + const simd4f z1(norm[y+1][x+1], + norm[y][x+1], + norm[y+1][x], + norm[y][x]); + + const simd4f z2(norm[y+1][x+2], + norm[y][x+2], + norm[y+1][x+1], + norm[y][x+1]); + + const simd4f z3(norm[y+2][x+1], + norm[y+1][x+1], + norm[y+2][x], + norm[y+1][x]); + + const simd4f z4(norm[y+2][x+2], + norm[y+1][x+2], + norm[y+2][x+1], + norm[y+1][x+1]); + + const simd4f temp0 = std::sqrt(norm[y+1][x+1]); + const simd4f nn = 0.2*sqrt(z1+z2+z3+z4+eps); + const simd4f n = 0.1/nn; + + simd4f t = 0; + + const int xx = x+padding_cols_offset; + + simd4f h0 = min(temp0,nn)*n; + const float vv = sum(h0); + set_hog(hog,angle[y+1][x+1],xx,yy, vv); + t += h0; + + t *= 2*0.2357; + + // contrast-insensitive features + set_hog(hog,angle[y+1][x+1]%9+18,xx,yy, vv); + + + float temp[4]; + t.store(temp); + + // texture features + set_hog(hog,27,xx,yy, temp[0]); + set_hog(hog,28,xx,yy, temp[1]); + set_hog(hog,29,xx,yy, temp[2]); + set_hog(hog,30,xx,yy, temp[3]); + } + } + } + + // ------------------------------------------------------------------------------------ + + template < + typename image_type, + typename out_type + > + void impl_extract_fhog_features( + const image_type& img_, + out_type& hog, + int cell_size, + int filter_rows_padding, + int filter_cols_padding + ) + { + const_image_view<image_type> img(img_); + // make sure requires clause is not broken + DLIB_ASSERT( cell_size > 0 && + filter_rows_padding > 0 && + filter_cols_padding > 0 , + "\t void extract_fhog_features()" + << "\n\t Invalid inputs were given to this function. " + << "\n\t cell_size: " << cell_size + << "\n\t filter_rows_padding: " << filter_rows_padding + << "\n\t filter_cols_padding: " << filter_cols_padding + ); + + /* + This function implements the HOG feature extraction method described in + the paper: + P. Felzenszwalb, R. Girshick, D. McAllester, D. Ramanan + Object Detection with Discriminatively Trained Part Based Models + IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 32, No. 9, Sep. 2010 + + Moreover, this function is derived from the HOG feature extraction code + from the features.cc file in the voc-releaseX code (see + http://people.cs.uchicago.edu/~rbg/latent/) which is has the following + license (note that the code has been modified to work with grayscale and + color as well as planar and interlaced input and output formats): + + Copyright (C) 2011, 2012 Ross Girshick, Pedro Felzenszwalb + Copyright (C) 2008, 2009, 2010 Pedro Felzenszwalb, Ross Girshick + Copyright (C) 2007 Pedro Felzenszwalb, Deva Ramanan + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + if (cell_size == 1) + { + impl_extract_fhog_features_cell_size_1(img_,hog,filter_rows_padding,filter_cols_padding); + return; + } + + // unit vectors used to compute gradient orientation + matrix<float,2,1> directions[9]; + directions[0] = 1.0000, 0.0000; + directions[1] = 0.9397, 0.3420; + directions[2] = 0.7660, 0.6428; + directions[3] = 0.500, 0.8660; + directions[4] = 0.1736, 0.9848; + directions[5] = -0.1736, 0.9848; + directions[6] = -0.5000, 0.8660; + directions[7] = -0.7660, 0.6428; + directions[8] = -0.9397, 0.3420; + + + + // First we allocate memory for caching orientation histograms & their norms. + const int cells_nr = (int)((float)img.nr()/(float)cell_size + 0.5); + const int cells_nc = (int)((float)img.nc()/(float)cell_size + 0.5); + + if (cells_nr == 0 || cells_nc == 0) + { + hog.clear(); + return; + } + + // We give hist extra padding around the edges (1 cell all the way around the + // edge) so we can avoid needing to do boundary checks when indexing into it + // later on. So some statements assign to the boundary but those values are + // never used. + array2d<matrix<float,18,1> > hist(cells_nr+2, cells_nc+2); + for (long r = 0; r < hist.nr(); ++r) + { + for (long c = 0; c < hist.nc(); ++c) + { + hist[r][c] = 0; + } + } + + array2d<float> norm(cells_nr, cells_nc); + assign_all_pixels(norm, 0); + + // memory for HOG features + const int hog_nr = std::max(cells_nr-2, 0); + const int hog_nc = std::max(cells_nc-2, 0); + if (hog_nr == 0 || hog_nc == 0) + { + hog.clear(); + return; + } + const int padding_rows_offset = (filter_rows_padding-1)/2; + const int padding_cols_offset = (filter_cols_padding-1)/2; + init_hog(hog, hog_nr, hog_nc, filter_rows_padding, filter_cols_padding); + + const int visible_nr = std::min((long)cells_nr*cell_size,img.nr())-1; + const int visible_nc = std::min((long)cells_nc*cell_size,img.nc())-1; + + // First populate the gradient histograms + for (int y = 1; y < visible_nr; y++) + { + const float yp = ((float)y+0.5)/(float)cell_size - 0.5; + const int iyp = (int)std::floor(yp); + const float vy0 = yp - iyp; + const float vy1 = 1.0 - vy0; + int x; + for (x = 1; x < visible_nc - 7; x += 8) + { + simd8f xx(x, x + 1, x + 2, x + 3, x + 4, x + 5, x + 6, x + 7); + // v will be the length of the gradient vectors. + simd8f grad_x, grad_y, v; + get_gradient(y, x, img, grad_x, grad_y, v); + + // We will use bilinear interpolation to add into the histogram bins. + // So first we precompute the values needed to determine how much each + // pixel votes into each bin. + simd8f xp = (xx + 0.5) / (float)cell_size + 0.5; + simd8i ixp = simd8i(xp); + simd8f vx0 = xp - ixp; + simd8f vx1 = 1.0f - vx0; + + v = sqrt(v); + + // Now snap the gradient to one of 18 orientations + simd8f best_dot = 0; + simd8f best_o = 0; + for (int o = 0; o < 9; o++) + { + simd8f dot = grad_x*directions[o](0) + grad_y*directions[o](1); + simd8f_bool cmp = dot>best_dot; + best_dot = select(cmp, dot, best_dot); + dot *= -1; + best_o = select(cmp, o, best_o); + + cmp = dot > best_dot; + best_dot = select(cmp, dot, best_dot); + best_o = select(cmp, o + 9, best_o); + } + + + // Add the gradient magnitude, v, to 4 histograms around pixel using + // bilinear interpolation. + vx1 *= v; + vx0 *= v; + // The amounts for each bin + simd8f v11 = vy1*vx1; + simd8f v01 = vy0*vx1; + simd8f v10 = vy1*vx0; + simd8f v00 = vy0*vx0; + + int32 _best_o[8]; simd8i(best_o).store(_best_o); + int32 _ixp[8]; ixp.store(_ixp); + float _v11[8]; v11.store(_v11); + float _v01[8]; v01.store(_v01); + float _v10[8]; v10.store(_v10); + float _v00[8]; v00.store(_v00); + + hist[iyp + 1][_ixp[0]](_best_o[0]) += _v11[0]; + hist[iyp + 1 + 1][_ixp[0]](_best_o[0]) += _v01[0]; + hist[iyp + 1][_ixp[0] + 1](_best_o[0]) += _v10[0]; + hist[iyp + 1 + 1][_ixp[0] + 1](_best_o[0]) += _v00[0]; + + hist[iyp + 1][_ixp[1]](_best_o[1]) += _v11[1]; + hist[iyp + 1 + 1][_ixp[1]](_best_o[1]) += _v01[1]; + hist[iyp + 1][_ixp[1] + 1](_best_o[1]) += _v10[1]; + hist[iyp + 1 + 1][_ixp[1] + 1](_best_o[1]) += _v00[1]; + + hist[iyp + 1][_ixp[2]](_best_o[2]) += _v11[2]; + hist[iyp + 1 + 1][_ixp[2]](_best_o[2]) += _v01[2]; + hist[iyp + 1][_ixp[2] + 1](_best_o[2]) += _v10[2]; + hist[iyp + 1 + 1][_ixp[2] + 1](_best_o[2]) += _v00[2]; + + hist[iyp + 1][_ixp[3]](_best_o[3]) += _v11[3]; + hist[iyp + 1 + 1][_ixp[3]](_best_o[3]) += _v01[3]; + hist[iyp + 1][_ixp[3] + 1](_best_o[3]) += _v10[3]; + hist[iyp + 1 + 1][_ixp[3] + 1](_best_o[3]) += _v00[3]; + + hist[iyp + 1][_ixp[4]](_best_o[4]) += _v11[4]; + hist[iyp + 1 + 1][_ixp[4]](_best_o[4]) += _v01[4]; + hist[iyp + 1][_ixp[4] + 1](_best_o[4]) += _v10[4]; + hist[iyp + 1 + 1][_ixp[4] + 1](_best_o[4]) += _v00[4]; + + hist[iyp + 1][_ixp[5]](_best_o[5]) += _v11[5]; + hist[iyp + 1 + 1][_ixp[5]](_best_o[5]) += _v01[5]; + hist[iyp + 1][_ixp[5] + 1](_best_o[5]) += _v10[5]; + hist[iyp + 1 + 1][_ixp[5] + 1](_best_o[5]) += _v00[5]; + + hist[iyp + 1][_ixp[6]](_best_o[6]) += _v11[6]; + hist[iyp + 1 + 1][_ixp[6]](_best_o[6]) += _v01[6]; + hist[iyp + 1][_ixp[6] + 1](_best_o[6]) += _v10[6]; + hist[iyp + 1 + 1][_ixp[6] + 1](_best_o[6]) += _v00[6]; + + hist[iyp + 1][_ixp[7]](_best_o[7]) += _v11[7]; + hist[iyp + 1 + 1][_ixp[7]](_best_o[7]) += _v01[7]; + hist[iyp + 1][_ixp[7] + 1](_best_o[7]) += _v10[7]; + hist[iyp + 1 + 1][_ixp[7] + 1](_best_o[7]) += _v00[7]; + } + // Now process the right columns that don't fit into simd registers. + for (; x < visible_nc; x++) + { + matrix<float, 2, 1> grad; + float v; + get_gradient(y,x,img,grad,v); + + // snap to one of 18 orientations + float best_dot = 0; + int best_o = 0; + for (int o = 0; o < 9; o++) + { + const float dot = dlib::dot(directions[o], grad); + if (dot > best_dot) + { + best_dot = dot; + best_o = o; + } + else if (-dot > best_dot) + { + best_dot = -dot; + best_o = o+9; + } + } + + v = std::sqrt(v); + // add to 4 histograms around pixel using bilinear interpolation + const float xp = ((double)x + 0.5) / (double)cell_size - 0.5; + const int ixp = (int)std::floor(xp); + const float vx0 = xp - ixp; + const float vx1 = 1.0 - vx0; + + hist[iyp+1][ixp+1](best_o) += vy1*vx1*v; + hist[iyp+1+1][ixp+1](best_o) += vy0*vx1*v; + hist[iyp+1][ixp+1+1](best_o) += vy1*vx0*v; + hist[iyp+1+1][ixp+1+1](best_o) += vy0*vx0*v; + } + } + + // compute energy in each block by summing over orientations + for (int r = 0; r < cells_nr; ++r) + { + for (int c = 0; c < cells_nc; ++c) + { + for (int o = 0; o < 9; o++) + { + norm[r][c] += (hist[r+1][c+1](o) + hist[r+1][c+1](o+9)) * (hist[r+1][c+1](o) + hist[r+1][c+1](o+9)); + } + } + } + + const float eps = 0.0001; + // compute features + for (int y = 0; y < hog_nr; y++) + { + const int yy = y+padding_rows_offset; + for (int x = 0; x < hog_nc; x++) + { + const simd4f z1(norm[y+1][x+1], + norm[y][x+1], + norm[y+1][x], + norm[y][x]); + + const simd4f z2(norm[y+1][x+2], + norm[y][x+2], + norm[y+1][x+1], + norm[y][x+1]); + + const simd4f z3(norm[y+2][x+1], + norm[y+1][x+1], + norm[y+2][x], + norm[y+1][x]); + + const simd4f z4(norm[y+2][x+2], + norm[y+1][x+2], + norm[y+2][x+1], + norm[y+1][x+1]); + + const simd4f nn = 0.2*sqrt(z1+z2+z3+z4+eps); + const simd4f n = 0.1/nn; + + simd4f t = 0; + + const int xx = x+padding_cols_offset; + + // contrast-sensitive features + for (int o = 0; o < 18; o+=3) + { + simd4f temp0(hist[y+1+1][x+1+1](o)); + simd4f temp1(hist[y+1+1][x+1+1](o+1)); + simd4f temp2(hist[y+1+1][x+1+1](o+2)); + simd4f h0 = min(temp0,nn)*n; + simd4f h1 = min(temp1,nn)*n; + simd4f h2 = min(temp2,nn)*n; + set_hog(hog,o,xx,yy, sum(h0)); + set_hog(hog,o+1,xx,yy, sum(h1)); + set_hog(hog,o+2,xx,yy, sum(h2)); + t += h0+h1+h2; + } + + t *= 2*0.2357; + + // contrast-insensitive features + for (int o = 0; o < 9; o+=3) + { + simd4f temp0 = hist[y+1+1][x+1+1](o) + hist[y+1+1][x+1+1](o+9); + simd4f temp1 = hist[y+1+1][x+1+1](o+1) + hist[y+1+1][x+1+1](o+9+1); + simd4f temp2 = hist[y+1+1][x+1+1](o+2) + hist[y+1+1][x+1+1](o+9+2); + simd4f h0 = min(temp0,nn)*n; + simd4f h1 = min(temp1,nn)*n; + simd4f h2 = min(temp2,nn)*n; + set_hog(hog,o+18,xx,yy, sum(h0)); + set_hog(hog,o+18+1,xx,yy, sum(h1)); + set_hog(hog,o+18+2,xx,yy, sum(h2)); + } + + + float temp[4]; + t.store(temp); + + // texture features + set_hog(hog,27,xx,yy, temp[0]); + set_hog(hog,28,xx,yy, temp[1]); + set_hog(hog,29,xx,yy, temp[2]); + set_hog(hog,30,xx,yy, temp[3]); + } + } + } + + // ------------------------------------------------------------------------------------ + + inline void create_fhog_bar_images ( + dlib::array<matrix<float> >& mbars, + const long w + ) + { + const long bdims = 9; + // Make the oriented lines we use to draw on each HOG cell. + mbars.resize(bdims); + dlib::array<array2d<unsigned char> > bars(bdims); + array2d<unsigned char> temp(w,w); + for (unsigned long i = 0; i < bars.size(); ++i) + { + assign_all_pixels(temp, 0); + draw_line(temp, point(w/2,0), point(w/2,w-1), 255); + rotate_image(temp, bars[i], i*-pi/bars.size()); + + mbars[i] = subm(matrix_cast<float>(mat(bars[i])), centered_rect(get_rect(bars[i]),w,w) ); + } + } + + } // end namespace impl_fhog + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + + template < + typename image_type, + typename T, + typename mm1, + typename mm2 + > + void extract_fhog_features( + const image_type& img, + dlib::array<array2d<T,mm1>,mm2>& hog, + int cell_size = 8, + int filter_rows_padding = 1, + int filter_cols_padding = 1 + ) + { + impl_fhog::impl_extract_fhog_features(img, hog, cell_size, filter_rows_padding, filter_cols_padding); + // If the image is too small then the above function outputs an empty feature map. + // But to make things very uniform in usage we require the output to still have the + // 31 planes (but they are just empty). + if (hog.size() == 0) + hog.resize(31); + } + + template < + typename image_type, + typename T, + typename mm + > + void extract_fhog_features( + const image_type& img, + array2d<matrix<T,31,1>,mm>& hog, + int cell_size = 8, + int filter_rows_padding = 1, + int filter_cols_padding = 1 + ) + { + impl_fhog::impl_extract_fhog_features(img, hog, cell_size, filter_rows_padding, filter_cols_padding); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type, + typename T + > + void extract_fhog_features( + const image_type& img, + matrix<T,0,1>& feats, + int cell_size = 8, + int filter_rows_padding = 1, + int filter_cols_padding = 1 + ) + { + dlib::array<array2d<T> > hog; + extract_fhog_features(img, hog, cell_size, filter_rows_padding, filter_cols_padding); + feats.set_size(hog.size()*hog[0].size()); + for (unsigned long i = 0; i < hog.size(); ++i) + { + const long size = hog[i].size(); + set_rowm(feats, range(i*size, (i+1)*size-1)) = reshape_to_column_vector(mat(hog[i])); + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + matrix<double,0,1> extract_fhog_features( + const image_type& img, + int cell_size = 8, + int filter_rows_padding = 1, + int filter_cols_padding = 1 + ) + { + matrix<double, 0, 1> feats; + extract_fhog_features(img, feats, cell_size, filter_rows_padding, filter_cols_padding); + return feats; + } + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + + inline point image_to_fhog ( + point p, + int cell_size = 8, + int filter_rows_padding = 1, + int filter_cols_padding = 1 + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( cell_size > 0 && + filter_rows_padding > 0 && + filter_cols_padding > 0 , + "\t point image_to_fhog()" + << "\n\t Invalid inputs were given to this function. " + << "\n\t cell_size: " << cell_size + << "\n\t filter_rows_padding: " << filter_rows_padding + << "\n\t filter_cols_padding: " << filter_cols_padding + ); + + // There is a one pixel border around the image. + p -= point(1,1); + // There is also a 1 "cell" border around the HOG image formation. + return p/cell_size - point(1,1) + point((filter_cols_padding-1)/2,(filter_rows_padding-1)/2); + } + +// ---------------------------------------------------------------------------------------- + + inline rectangle image_to_fhog ( + const rectangle& rect, + int cell_size = 8, + int filter_rows_padding = 1, + int filter_cols_padding = 1 + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( cell_size > 0 && + filter_rows_padding > 0 && + filter_cols_padding > 0 , + "\t rectangle image_to_fhog()" + << "\n\t Invalid inputs were given to this function. " + << "\n\t cell_size: " << cell_size + << "\n\t filter_rows_padding: " << filter_rows_padding + << "\n\t filter_cols_padding: " << filter_cols_padding + ); + + return rectangle(image_to_fhog(rect.tl_corner(),cell_size,filter_rows_padding,filter_cols_padding), + image_to_fhog(rect.br_corner(),cell_size,filter_rows_padding,filter_cols_padding)); + } + +// ---------------------------------------------------------------------------------------- + + inline point fhog_to_image ( + point p, + int cell_size = 8, + int filter_rows_padding = 1, + int filter_cols_padding = 1 + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( cell_size > 0 && + filter_rows_padding > 0 && + filter_cols_padding > 0 , + "\t point fhog_to_image()" + << "\n\t Invalid inputs were given to this function. " + << "\n\t cell_size: " << cell_size + << "\n\t filter_rows_padding: " << filter_rows_padding + << "\n\t filter_cols_padding: " << filter_cols_padding + ); + + // Convert to image space and then set to the center of the cell. + point offset; + + p = (p+point(1,1)-point((filter_cols_padding-1)/2,(filter_rows_padding-1)/2))*cell_size + point(1,1); + if (p.x() >= 0 && p.y() >= 0) offset = point(cell_size/2,cell_size/2); + if (p.x() < 0 && p.y() >= 0) offset = point(-cell_size/2,cell_size/2); + if (p.x() >= 0 && p.y() < 0) offset = point(cell_size/2,-cell_size/2); + if (p.x() < 0 && p.y() < 0) offset = point(-cell_size/2,-cell_size/2); + return p + offset; + } + +// ---------------------------------------------------------------------------------------- + + inline rectangle fhog_to_image ( + const rectangle& rect, + int cell_size = 8, + int filter_rows_padding = 1, + int filter_cols_padding = 1 + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( cell_size > 0 && + filter_rows_padding > 0 && + filter_cols_padding > 0 , + "\t rectangle fhog_to_image()" + << "\n\t Invalid inputs were given to this function. " + << "\n\t cell_size: " << cell_size + << "\n\t filter_rows_padding: " << filter_rows_padding + << "\n\t filter_cols_padding: " << filter_cols_padding + ); + + return rectangle(fhog_to_image(rect.tl_corner(),cell_size,filter_rows_padding,filter_cols_padding), + fhog_to_image(rect.br_corner(),cell_size,filter_rows_padding,filter_cols_padding)); + } + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + + template < + typename T, + typename mm1, + typename mm2 + > + matrix<unsigned char> draw_fhog( + const dlib::array<array2d<T,mm1>,mm2>& hog, + const long cell_draw_size = 15, + const float min_response_threshold = 0.0 + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( cell_draw_size > 0 && hog.size()==31, + "\t matrix<unsigned char> draw_fhog()" + << "\n\t Invalid inputs were given to this function. " + << "\n\t cell_draw_size: " << cell_draw_size + << "\n\t hog.size(): " << hog.size() + ); + + dlib::array<matrix<float> > mbars; + impl_fhog::create_fhog_bar_images(mbars,cell_draw_size); + + // now draw the bars onto the HOG cells + matrix<float> himg(hog[0].nr()*cell_draw_size, hog[0].nc()*cell_draw_size); + himg = 0; + for (unsigned long d = 0; d < mbars.size(); ++d) + { + for (long r = 0; r < himg.nr(); r+=cell_draw_size) + { + for (long c = 0; c < himg.nc(); c+=cell_draw_size) + { + const float val = hog[d][r/cell_draw_size][c/cell_draw_size] + + hog[d+mbars.size()][r/cell_draw_size][c/cell_draw_size] + + hog[d+mbars.size()*2][r/cell_draw_size][c/cell_draw_size]; + if (val > min_response_threshold) + { + set_subm(himg, r, c, cell_draw_size, cell_draw_size) += val*mbars[d%mbars.size()]; + } + } + } + } + + const float thresh = mean(himg) + 4 * stddev(himg); + if (thresh != 0) + return matrix_cast<unsigned char>(upperbound(round(himg*255/thresh),255)); + else + return matrix_cast<unsigned char>(himg); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + matrix<unsigned char> draw_fhog ( + const std::vector<matrix<T> >& hog, + const long cell_draw_size = 15, + const float min_response_threshold = 0.0 + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( cell_draw_size > 0 && hog.size()==31, + "\t matrix<unsigned char> draw_fhog()" + << "\n\t Invalid inputs were given to this function. " + << "\n\t cell_draw_size: " << cell_draw_size + << "\n\t hog.size(): " << hog.size() + ); + + // Just convert the input into the right object and then call the above draw_fhog() + // function on it. + dlib::array<array2d<T> > temp(hog.size()); + for (unsigned long i = 0; i < temp.size(); ++i) + { + temp[i].set_size(hog[i].nr(), hog[i].nc()); + for (long r = 0; r < hog[i].nr(); ++r) + { + for (long c = 0; c < hog[i].nc(); ++c) + { + temp[i][r][c] = hog[i](r,c); + } + } + } + return draw_fhog(temp,cell_draw_size, min_response_threshold); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename T, + typename mm + > + matrix<unsigned char> draw_fhog( + const array2d<matrix<T,31,1>,mm>& hog, + const long cell_draw_size = 15, + const float min_response_threshold = 0.0 + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( cell_draw_size > 0, + "\t matrix<unsigned char> draw_fhog()" + << "\n\t Invalid inputs were given to this function. " + << "\n\t cell_draw_size: " << cell_draw_size + ); + + dlib::array<matrix<float> > mbars; + impl_fhog::create_fhog_bar_images(mbars,cell_draw_size); + + // now draw the bars onto the HOG cells + matrix<float> himg(hog.nr()*cell_draw_size, hog.nc()*cell_draw_size); + himg = 0; + for (unsigned long d = 0; d < mbars.size(); ++d) + { + for (long r = 0; r < himg.nr(); r+=cell_draw_size) + { + for (long c = 0; c < himg.nc(); c+=cell_draw_size) + { + const float val = hog[r/cell_draw_size][c/cell_draw_size](d) + + hog[r/cell_draw_size][c/cell_draw_size](d+mbars.size()) + + hog[r/cell_draw_size][c/cell_draw_size](d+mbars.size()*2); + if (val > min_response_threshold) + { + set_subm(himg, r, c, cell_draw_size, cell_draw_size) += val*mbars[d%mbars.size()]; + } + } + } + } + + const float thresh = mean(himg) + 4 * stddev(himg); + if (thresh != 0) + return matrix_cast<unsigned char>(upperbound(round(himg*255/thresh),255)); + else + return matrix_cast<unsigned char>(himg); + } + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_fHOG_Hh_ + diff --git a/ml/dlib/dlib/image_transforms/fhog_abstract.h b/ml/dlib/dlib/image_transforms/fhog_abstract.h new file mode 100644 index 000000000..f66c5d55a --- /dev/null +++ b/ml/dlib/dlib/image_transforms/fhog_abstract.h @@ -0,0 +1,346 @@ +// Copyright (C) 2013 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_fHOG_ABSTRACT_Hh_ +#ifdef DLIB_fHOG_ABSTRACT_Hh_ + +#include "../matrix/matrix_abstract.h" +#include "../array2d/array2d_kernel_abstract.h" +#include "../array/array_kernel_abstract.h" +#include "../image_processing/generic_image.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type, + typename T, + typename mm + > + void extract_fhog_features( + const image_type& img, + array2d<matrix<T,31,1>,mm>& hog, + int cell_size = 8, + int filter_rows_padding = 1, + int filter_cols_padding = 1 + ); + /*! + requires + - cell_size > 0 + - filter_rows_padding > 0 + - filter_cols_padding > 0 + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - T should be float or double + ensures + - This function implements the HOG feature extraction method described in + the paper: + Object Detection with Discriminatively Trained Part Based Models by + P. Felzenszwalb, R. Girshick, D. McAllester, D. Ramanan + IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 32, No. 9, Sep. 2010 + This means that it takes an input image img and outputs Felzenszwalb's + 31 dimensional version of HOG features, which are stored into #hog. + - The input image is broken into cells that are cell_size by cell_size pixels + and within each cell we compute a 31 dimensional FHOG vector. This vector + describes the gradient structure within the cell. + - A common task is to convolve each channel of the hog image with a linear + filter. This is made more convenient if the contents of #hog includes extra + rows and columns of zero padding along the borders. This extra padding + allows for more efficient convolution code since the code does not need to + perform expensive boundary checking. Therefore, you can set + filter_rows_padding and filter_cols_padding to indicate the size of the + filter you wish to use and this function will ensure #hog has the appropriate + extra zero padding along the borders. In particular, it will include the + following extra padding: + - (filter_rows_padding-1)/2 extra rows of zeros on the top of #hog. + - (filter_cols_padding-1)/2 extra columns of zeros on the left of #hog. + - filter_rows_padding/2 extra rows of zeros on the bottom of #hog. + - filter_cols_padding/2 extra columns of zeros on the right of #hog. + Therefore, the extra padding is done such that functions like + spatially_filter_image() apply their filters to the entire content containing + area of a hog image (note that you should use the following planar version of + extract_fhog_features() instead of the interlaced version if you want to use + spatially_filter_image() on a hog image). + - #hog.nr() == max(round(img.nr()/(double)cell_size)-2,0) + filter_rows_padding-1. + - #hog.nc() == max(round(img.nc()/(double)cell_size)-2,0) + filter_cols_padding-1. + (i.e. Each output dimension is roughly 1/cell_size the original size but + there is a one cell_size border all around the image that is lost and then we + add on any additional padding that is requested.) + - for all valid r and c: + - #hog[r][c] == the FHOG vector describing the cell centered at the pixel location + fhog_to_image(point(c,r),cell_size,filter_rows_padding,filter_cols_padding) in img. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type, + typename T, + typename mm1, + typename mm2 + > + void extract_fhog_features( + const image_type& img, + dlib::array<array2d<T,mm1>,mm2>& hog, + int cell_size = 8, + int filter_rows_padding = 1, + int filter_cols_padding = 1 + ); + /*! + requires + - cell_size > 0 + - filter_rows_padding > 0 + - filter_cols_padding > 0 + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - T should be float or double + ensures + - This function is identical to the above extract_fhog_features() routine + except that it outputs the results in a planar format rather than the + interlaced format used above. That is, each element of the hog vector is + placed into one of 31 images inside #hog. To be precise, if vhog is the + output of the above interlaced version of extract_fhog_features() then we + will have, for all valid r and c: + - #hog[i][r][c] == vhog[r][c](i) + (where 0 <= i < 31) + - #hog.size() == 31 + - for all valid i: + - #hog[i].nr() == hog[0].nr() + - #hog[i].nc() == hog[0].nc() + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + matrix<double,0,1> extract_fhog_features( + const image_type& img, + int cell_size = 8, + int filter_rows_padding = 1, + int filter_cols_padding = 1 + ); + /*! + requires + - cell_size > 0 + - filter_rows_padding > 0 + - filter_cols_padding > 0 + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + ensures + - This function calls the above extract_fhog_features() routine and simply + packages the entire output into a dlib::matrix. The matrix is constructed + using the planar version of extract_fhog_features() and then each output + plane is converted into a column vector and subsequently all 31 column + vectors are concatenated together and returned. + - Each plane is converted into a column vector using reshape_to_column_vector(), + and is therefore represented in row major order inside the returned vector. + - If H is the array<array2d<double>> object output by the planar + extract_fhog_features() then the returned vector is composed by concatenating + H[0], then H[1], then H[2], and so on in ascending index order. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type, + typename T + > + void extract_fhog_features( + const image_type& img, + matrix<T,0,1>& feats, + int cell_size = 8, + int filter_rows_padding = 1, + int filter_cols_padding = 1 + ); + /*! + requires + - cell_size > 0 + - filter_rows_padding > 0 + - filter_cols_padding > 0 + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - T is float, double, or long double + ensures + - This function is identical to the above version of extract_fhog_features() + that returns a matrix<double,0,1> except that it returns the matrix here + through a reference argument instead of returning it by value. + !*/ + +// ---------------------------------------------------------------------------------------- + + inline point image_to_fhog ( + point p, + int cell_size = 8, + int filter_rows_padding = 1, + int filter_cols_padding = 1 + ); + /*! + requires + - cell_size > 0 + - filter_rows_padding > 0 + - filter_cols_padding > 0 + ensures + - When using extract_fhog_features(), each FHOG cell is extracted from a + certain region in the input image. image_to_fhog() returns the identity of + the FHOG cell containing the image pixel at location p. Or in other words, + let P == image_to_fhog(p) and hog be a FHOG feature map output by + extract_fhog_features(), then hog[P.y()][P.x()] == the FHOG vector/cell + containing the point p in the input image. Note that some image points + might not have corresponding feature locations. E.g. border points or points + outside the image. In these cases the returned point will be outside the + input image. + - Note that you should use the same values of cell_size, filter_rows_padding, + and filter_cols_padding that you used with extract_fhog_features(). + !*/ + +// ---------------------------------------------------------------------------------------- + + inline rectangle image_to_fhog ( + const rectangle& rect, + int cell_size = 8, + int filter_rows_padding = 1, + int filter_cols_padding = 1 + ); + /*! + requires + - cell_size > 0 + - filter_rows_padding > 0 + - filter_cols_padding > 0 + ensures + - maps a rectangle from image space to fhog space. In particular this function returns: + rectangle(image_to_fhog(rect.tl_corner(),cell_size,filter_rows_padding,filter_cols_padding), + image_to_fhog(rect.br_corner(),cell_size,filter_rows_padding,filter_cols_padding)) + !*/ + +// ---------------------------------------------------------------------------------------- + + inline point fhog_to_image ( + point p, + int cell_size = 8, + int filter_rows_padding = 1, + int filter_cols_padding = 1 + ); + /*! + requires + - cell_size > 0 + - filter_rows_padding > 0 + - filter_cols_padding > 0 + ensures + - Maps a pixel in a FHOG image (produced by extract_fhog_features()) back to the + corresponding original input pixel. Note that since FHOG images are + spatially downsampled by aggregation into cells the mapping is not totally + invertible. Therefore, the returned location will be the center of the cell + in the original image that contained the FHOG vector at position p. Moreover, + cell_size, filter_rows_padding, and filter_cols_padding should be set to the + values used by the call to extract_fhog_features(). + - Mapping from fhog space to image space is an invertible transformation. That + is, for any point P we have P == image_to_fhog(fhog_to_image(P,cell_size,filter_rows_padding,filter_cols_padding), + cell_size,filter_rows_padding,filter_cols_padding). + !*/ + +// ---------------------------------------------------------------------------------------- + + inline rectangle fhog_to_image ( + const rectangle& rect, + int cell_size = 8, + int filter_rows_padding = 1, + int filter_cols_padding = 1 + ); + /*! + requires + - cell_size > 0 + - filter_rows_padding > 0 + - filter_cols_padding > 0 + ensures + - maps a rectangle from fhog space to image space. In particular this function returns: + rectangle(fhog_to_image(rect.tl_corner(),cell_size,filter_rows_padding,filter_cols_padding), + fhog_to_image(rect.br_corner(),cell_size,filter_rows_padding,filter_cols_padding)) + - Mapping from fhog space to image space is an invertible transformation. That + is, for any rectangle R we have R == image_to_fhog(fhog_to_image(R,cell_size,filter_rows_padding,filter_cols_padding), + cell_size,filter_rows_padding,filter_cols_padding). + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename T, + typename mm1, + typename mm2 + > + matrix<unsigned char> draw_fhog( + const dlib::array<array2d<T,mm1>,mm2>& hog, + const long cell_draw_size = 15, + const float min_response_threshold = 0.0 + ); + /*! + requires + - cell_draw_size > 0 + - hog.size() == 31 + ensures + - Interprets hog as a FHOG feature map output by extract_fhog_features() and + converts it into an image suitable for display on the screen. In particular, + we draw all the hog cells into a grayscale image in a way that shows the + magnitude and orientation of the gradient energy in each cell. The result is + then returned. + - The size of the cells in the output image will be rendered as cell_draw_size + pixels wide and tall. + - HOG cells with a response value less than min_response_threshold are not + drawn. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + matrix<unsigned char> draw_fhog ( + const std::vector<matrix<T> >& hog, + const long cell_draw_size = 15, + const float min_response_threshold = 0.0 + ); + /*! + requires + - cell_draw_size > 0 + - hog.size() == 31 + ensures + - This function just converts the given hog object into an array<array2d<T>> + and passes it to the above draw_fhog() routine and returns the results. + - HOG cells with a response value less than min_response_threshold are not + drawn. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename T, + typename mm + > + matrix<unsigned char> draw_fhog( + const array2d<matrix<T,31,1>,mm>& hog, + const long cell_draw_size = 15, + const float min_response_threshold = 0.0 + ); + /*! + requires + - cell_draw_size > 0 + ensures + - Interprets hog as a FHOG feature map output by extract_fhog_features() and + converts it into an image suitable for display on the screen. In particular, + we draw all the hog cells into a grayscale image in a way that shows the + magnitude and orientation of the gradient energy in each cell. The result is + then returned. + - The size of the cells in the output image will be rendered as cell_draw_size + pixels wide and tall. + - HOG cells with a response value less than min_response_threshold are not + drawn. + !*/ + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_fHOG_ABSTRACT_Hh_ + + diff --git a/ml/dlib/dlib/image_transforms/hough_transform.h b/ml/dlib/dlib/image_transforms/hough_transform.h new file mode 100644 index 000000000..477b4dc2b --- /dev/null +++ b/ml/dlib/dlib/image_transforms/hough_transform.h @@ -0,0 +1,358 @@ +// Copyright (C) 2014 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_HOUGH_tRANSFORM_Hh_ +#define DLIB_HOUGH_tRANSFORM_Hh_ + +#include "hough_transform_abstract.h" +#include "../image_processing/generic_image.h" +#include "../geometry.h" +#include "../algs.h" +#include "assign_image.h" +#include <limits> + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + class hough_transform + { + + public: + explicit hough_transform ( + unsigned long size_ + ) : _size(size_) + { + DLIB_CASSERT(size_ > 0, + "\t hough_transform::hough_transform(size_)" + << "\n\t Invalid arguments given to this function." + ); + + even_size = _size - (_size%2); + + const point cent = center(rectangle(0,0,size_-1,size_-1)); + xcos_theta.set_size(size_, size_); + ysin_theta.set_size(size_, size_); + + std::vector<double> cos_theta(size_), sin_theta(size_); + const double scale = 1<<16; + for (unsigned long t = 0; t < size_; ++t) + { + double theta = t*pi/even_size; + + cos_theta[t] = scale*std::cos(theta)/sqrt_2; + sin_theta[t] = scale*std::sin(theta)/sqrt_2; + } + const double offset = scale*even_size/4.0 + 0.5; + + for (unsigned long c = 0; c < size_; ++c) + { + const long x = c - cent.x(); + for (unsigned long t = 0; t < size_; ++t) + xcos_theta(c,t) = static_cast<int32>(x*cos_theta[t] + offset); + } + for (unsigned long r = 0; r < size_; ++r) + { + const long y = r - cent.y(); + for (unsigned long t = 0; t < size_; ++t) + ysin_theta(r,t) = static_cast<int32>(y*sin_theta[t] + offset); + } + } + + unsigned long size( + ) const { return _size; } + + long nr( + ) const { return _size; } + + long nc( + ) const { return _size; } + + std::pair<point, point> get_line ( + const point& p + ) const + { + DLIB_ASSERT(rectangle(0,0,size()-1,size()-1).contains(p) == true, + "\t pair<point,point> hough_transform::get_line(point)" + << "\n\t Invalid arguments given to this function." + << "\n\t p: " << p + << "\n\t size(): " << size() + ); + + // First we compute the radius measured in pixels from the center and the theta + // angle in radians. + typedef dlib::vector<double,2> vect; + const rectangle box(0,0,size()-1,size()-1); + const vect cent = center(box); + double theta = p.x()-cent.x(); + double radius = p.y()-cent.y(); + theta = theta*pi/even_size; + radius = radius*sqrt_2 + 0.5; + + // now make a line segment on the line. + vect v1 = cent + vect(size()+1000,0) + vect(0,radius); + vect v2 = cent - vect(size()+1000,0) + vect(0,radius); + point p1 = rotate_point(cent, v1, theta); + point p2 = rotate_point(cent, v2, theta); + + clip_line_to_rectangle(box, p1, p2); + + return std::make_pair(p1,p2); + } + + template < + typename image_type + > + point get_best_hough_point ( + const point& p, + const image_type& himg_ + ) + { + const const_image_view<image_type> himg(himg_); + + DLIB_ASSERT(himg.nr() == size() && himg.nc() == size() && + rectangle(0,0,size()-1,size()-1).contains(p) == true, + "\t point hough_transform::get_best_hough_point()" + << "\n\t Invalid arguments given to this function." + << "\n\t himg.nr(): " << himg.nr() + << "\n\t himg.nc(): " << himg.nc() + << "\n\t size(): " << size() + << "\n\t p: " << p + ); + + + typedef typename image_traits<image_type>::pixel_type pixel_type; + COMPILE_TIME_ASSERT(pixel_traits<pixel_type>::grayscale == true); + pixel_type best_val = std::numeric_limits<pixel_type>::min(); + point best_point; + + + const long max_n8 = (himg.nc()/8)*8; + const long max_n4 = (himg.nc()/4)*4; + const long r = p.y(); + const long c = p.x(); + + const int32* ysin = &ysin_theta(r,0); + const int32* xcos = &xcos_theta(c,0); + long t = 0; + while(t < max_n8) + { + long rr0 = (*xcos++ + *ysin++)>>16; + long rr1 = (*xcos++ + *ysin++)>>16; + long rr2 = (*xcos++ + *ysin++)>>16; + long rr3 = (*xcos++ + *ysin++)>>16; + long rr4 = (*xcos++ + *ysin++)>>16; + long rr5 = (*xcos++ + *ysin++)>>16; + long rr6 = (*xcos++ + *ysin++)>>16; + long rr7 = (*xcos++ + *ysin++)>>16; + + if (himg[rr0][t++] > best_val) + { + best_val = himg[rr0][t-1]; + best_point.x() = t-1; + best_point.y() = rr0; + } + if (himg[rr1][t++] > best_val) + { + best_val = himg[rr1][t-1]; + best_point.x() = t-1; + best_point.y() = rr1; + } + if (himg[rr2][t++] > best_val) + { + best_val = himg[rr2][t-1]; + best_point.x() = t-1; + best_point.y() = rr2; + } + if (himg[rr3][t++] > best_val) + { + best_val = himg[rr3][t-1]; + best_point.x() = t-1; + best_point.y() = rr3; + } + if (himg[rr4][t++] > best_val) + { + best_val = himg[rr4][t-1]; + best_point.x() = t-1; + best_point.y() = rr4; + } + if (himg[rr5][t++] > best_val) + { + best_val = himg[rr5][t-1]; + best_point.x() = t-1; + best_point.y() = rr5; + } + if (himg[rr6][t++] > best_val) + { + best_val = himg[rr6][t-1]; + best_point.x() = t-1; + best_point.y() = rr6; + } + if (himg[rr7][t++] > best_val) + { + best_val = himg[rr7][t-1]; + best_point.x() = t-1; + best_point.y() = rr7; + } + } + while(t < max_n4) + { + long rr0 = (*xcos++ + *ysin++)>>16; + long rr1 = (*xcos++ + *ysin++)>>16; + long rr2 = (*xcos++ + *ysin++)>>16; + long rr3 = (*xcos++ + *ysin++)>>16; + if (himg[rr0][t++] > best_val) + { + best_val = himg[rr0][t-1]; + best_point.x() = t-1; + best_point.y() = rr0; + } + if (himg[rr1][t++] > best_val) + { + best_val = himg[rr1][t-1]; + best_point.x() = t-1; + best_point.y() = rr1; + } + if (himg[rr2][t++] > best_val) + { + best_val = himg[rr2][t-1]; + best_point.x() = t-1; + best_point.y() = rr2; + } + if (himg[rr3][t++] > best_val) + { + best_val = himg[rr3][t-1]; + best_point.x() = t-1; + best_point.y() = rr3; + } + } + while(t < himg.nc()) + { + long rr0 = (*xcos++ + *ysin++)>>16; + if (himg[rr0][t++] > best_val) + { + best_val = himg[rr0][t-1]; + best_point.x() = t-1; + best_point.y() = rr0; + } + } + + return best_point; + } + + template < + typename in_image_type, + typename out_image_type + > + void operator() ( + const in_image_type& img_, + const rectangle& box, + out_image_type& himg_ + ) const + { + typedef typename image_traits<in_image_type>::pixel_type in_pixel_type; + typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; + + DLIB_CASSERT(box.width() == size() && box.height() == size(), + "\t hough_transform::hough_transform(size_)" + << "\n\t Invalid arguments given to this function." + << "\n\t box.width(): " << box.width() + << "\n\t box.height(): " << box.height() + << "\n\t size(): " << size() + ); + + COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type>::grayscale == true); + COMPILE_TIME_ASSERT(pixel_traits<out_pixel_type>::grayscale == true); + + const_image_view<in_image_type> img(img_); + image_view<out_image_type> himg(himg_); + + himg.set_size(size(), size()); + assign_all_pixels(himg, 0); + + const rectangle area = box.intersect(get_rect(img)); + + const long max_n8 = (himg.nc()/8)*8; + const long max_n4 = (himg.nc()/4)*4; + for (long r = area.top(); r <= area.bottom(); ++r) + { + const int32* ysin_base = &ysin_theta(r-box.top(),0); + for (long c = area.left(); c <= area.right(); ++c) + { + const out_pixel_type val = static_cast<out_pixel_type>(img[r][c]); + if (val != 0) + { + /* + // The code in this comment is equivalent to the more complex but + // faster code below. We keep this simple version of the Hough + // transform implementation here just to document what it's doing + // more clearly. + const point cent = center(box); + const long x = c - cent.x(); + const long y = r - cent.y(); + for (long t = 0; t < himg.nc(); ++t) + { + double theta = t*pi/even_size; + double radius = (x*std::cos(theta) + y*std::sin(theta))/sqrt_2 + even_size/2 + 0.5; + long rr = static_cast<long>(radius); + himg[rr][t] += val; + } + continue; + */ + + // Run the speed optimized version of the code in the above + // comment. + const int32* ysin = ysin_base; + const int32* xcos = &xcos_theta(c-box.left(),0); + long t = 0; + while(t < max_n8) + { + long rr0 = (*xcos++ + *ysin++)>>16; + long rr1 = (*xcos++ + *ysin++)>>16; + long rr2 = (*xcos++ + *ysin++)>>16; + long rr3 = (*xcos++ + *ysin++)>>16; + long rr4 = (*xcos++ + *ysin++)>>16; + long rr5 = (*xcos++ + *ysin++)>>16; + long rr6 = (*xcos++ + *ysin++)>>16; + long rr7 = (*xcos++ + *ysin++)>>16; + + himg[rr0][t++] += val; + himg[rr1][t++] += val; + himg[rr2][t++] += val; + himg[rr3][t++] += val; + himg[rr4][t++] += val; + himg[rr5][t++] += val; + himg[rr6][t++] += val; + himg[rr7][t++] += val; + } + while(t < max_n4) + { + long rr0 = (*xcos++ + *ysin++)>>16; + long rr1 = (*xcos++ + *ysin++)>>16; + long rr2 = (*xcos++ + *ysin++)>>16; + long rr3 = (*xcos++ + *ysin++)>>16; + himg[rr0][t++] += val; + himg[rr1][t++] += val; + himg[rr2][t++] += val; + himg[rr3][t++] += val; + } + while(t < himg.nc()) + { + long rr0 = (*xcos++ + *ysin++)>>16; + himg[rr0][t++] += val; + } + } + } + } + } + + private: + + unsigned long _size; + unsigned long even_size; // equal to _size if _size is even, otherwise equal to _size-1. + matrix<int32> xcos_theta, ysin_theta; + }; +} + +#endif // DLIB_HOUGH_tRANSFORM_Hh_ + diff --git a/ml/dlib/dlib/image_transforms/hough_transform_abstract.h b/ml/dlib/dlib/image_transforms/hough_transform_abstract.h new file mode 100644 index 000000000..f0ff2b550 --- /dev/null +++ b/ml/dlib/dlib/image_transforms/hough_transform_abstract.h @@ -0,0 +1,145 @@ +// Copyright (C) 2014 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_HOUGH_tRANSFORM_ABSTRACT_Hh_ +#ifdef DLIB_HOUGH_tRANSFORM_ABSTRACT_Hh_ + +#include "../geometry.h" +#include "../image_processing/generic_image.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + class hough_transform + { + /*! + WHAT THIS OBJECT REPRESENTS + This object is a tool for computing the line finding version of the Hough + transform given some kind of edge detection image as input. It also allows + the edge pixels to be weighted such that higher weighted edge pixels + contribute correspondingly more to the output of the Hough transform, + allowing stronger edges to create correspondingly stronger line detections + in the final Hough transform. + + THREAD SAFETY + It is safe for multiple threads to make concurrent accesses to this object + without synchronization. + !*/ + + public: + + explicit hough_transform ( + unsigned long size_ + ); + /*! + requires + - size_ > 0 + ensures + - This object will compute Hough transforms that are size_ by size_ pixels. + This is in terms of both the Hough accumulator array size as well as the + input image size. + - #size() == size_ + !*/ + + unsigned long size( + ) const; + /*! + ensures + - returns the size of the Hough transforms generated by this object. In + particular, this object creates Hough transform images that are size() by + size() pixels in size. + !*/ + + long nr( + ) const; + /*! + ensures + - returns size() + !*/ + + long nc( + ) const; + /*! + ensures + - returns size() + !*/ + + std::pair<point, point> get_line ( + const point& p + ) const; + /*! + requires + - rectangle(0,0,size()-1,size()-1).contains(p) == true + (i.e. p must be a point inside the Hough accumulator array) + ensures + - returns the line segment in the original image space corresponding + to Hough transform point p. + - The returned points are inside rectangle(0,0,size()-1,size()-1). + !*/ + + template < + typename image_type + > + point get_best_hough_point ( + const point& p, + const image_type& himg + ); + /*! + requires + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h and it must contain grayscale pixels. + - himg.nr() == size() + - himg.nc() == size() + - rectangle(0,0,size()-1,size()-1).contains(p) == true + ensures + - This function interprets himg as a Hough image and p as a point in the + original image space. Given this, it finds the maximum scoring line that + passes though p. That is, it checks all the Hough accumulator bins in + himg corresponding to lines though p and returns the location with the + largest score. + - returns a point X such that get_rect(himg).contains(X) == true + !*/ + + template < + typename in_image_type, + typename out_image_type + > + void operator() ( + const in_image_type& img, + const rectangle& box, + out_image_type& himg + ) const; + /*! + requires + - in_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h and it must contain grayscale pixels. + - out_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h and it must contain grayscale pixels. + - box.width() == size() + - box.height() == size() + ensures + - Computes the Hough transform of the part of img contained within box. + In particular, we do a grayscale version of the Hough transform where any + non-zero pixel in img is treated as a potential component of a line and + accumulated into the Hough accumulator #himg. However, rather than + adding 1 to each relevant accumulator bin we add the value of the pixel + in img to each Hough accumulator bin. This means that, if all the + pixels in img are 0 or 1 then this routine performs a normal Hough + transform. However, if some pixels have larger values then they will be + weighted correspondingly more in the resulting Hough transform. + - #himg.nr() == size() + - #himg.nc() == size() + - #himg is the Hough transform of the part of img contained in box. Each + point in #himg corresponds to a line in the input box. In particular, + the line for #himg[y][x] is given by get_line(point(x,y)). Also, when + viewing the #himg image, the x-axis gives the angle of the line and the + y-axis the distance of the line from the center of the box. + !*/ + + }; +} + +#endif // DLIB_HOUGH_tRANSFORM_ABSTRACT_Hh_ + + diff --git a/ml/dlib/dlib/image_transforms/image_pyramid.h b/ml/dlib/dlib/image_transforms/image_pyramid.h new file mode 100644 index 000000000..3efed30d8 --- /dev/null +++ b/ml/dlib/dlib/image_transforms/image_pyramid.h @@ -0,0 +1,1238 @@ +// Copyright (C) 2010 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_IMAGE_PYRaMID_Hh_ +#define DLIB_IMAGE_PYRaMID_Hh_ + +#include "image_pyramid_abstract.h" +#include "../pixel.h" +#include "../array2d.h" +#include "../geometry.h" +#include "spatial_filtering.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + class pyramid_disable : noncopyable + { + public: + + template <typename T> + vector<double,2> point_down ( + const vector<T,2>& + ) const + { + return vector<double,2>(0,0); + } + + template <typename T> + vector<double,2> point_up ( + const vector<T,2>& + ) const + { + return vector<double,2>(0,0); + } + + // ----------------------------- + + template <typename T> + vector<double,2> point_down ( + const vector<T,2>& p, + unsigned int levels + ) const + { + if (levels == 0) + return p; + else + return vector<double,2>(0,0); + } + + template <typename T> + vector<double,2> point_up ( + const vector<T,2>& p, + unsigned int levels + ) const + { + if (levels == 0) + return p; + else + return vector<double,2>(0,0); + } + + // ----------------------------- + + drectangle rect_up ( + const drectangle& rect + ) const + { + return drectangle(point_up(rect.tl_corner()), point_up(rect.br_corner())); + } + + drectangle rect_up ( + const drectangle& rect, + unsigned int levels + ) const + { + return drectangle(point_up(rect.tl_corner(),levels), point_up(rect.br_corner(),levels)); + } + + // ----------------------------- + + drectangle rect_down ( + const drectangle& rect + ) const + { + return drectangle(point_down(rect.tl_corner()), point_down(rect.br_corner())); + } + + drectangle rect_down ( + const drectangle& rect, + unsigned int levels + ) const + { + return drectangle(point_down(rect.tl_corner(),levels), point_down(rect.br_corner(),levels)); + } + + // ----------------------------- + + public: + + template < + typename in_image_type, + typename out_image_type + > + void operator() ( + // we do this #ifdef stuff to avoid compiler warnings about unused variables. +#ifdef ENABLE_ASSERTS + const in_image_type& original, +#else + const in_image_type& , +#endif + out_image_type& down + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(is_same_object(original, down) == false, + "\t void pyramid_disable::operator()" + << "\n\t is_same_object(original, down): " << is_same_object(original, down) + << "\n\t this: " << this + ); + + typedef typename image_traits<in_image_type>::pixel_type in_pixel_type; + typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; + COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false ); + COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false ); + + set_image_size(down, 0, 0); + } + + template < + typename image_type + > + void operator() ( + image_type& img + ) const + { + typedef typename image_traits<image_type>::pixel_type pixel_type; + COMPILE_TIME_ASSERT( pixel_traits<pixel_type>::has_alpha == false ); + set_image_size(img, 0, 0); + } + }; + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + + namespace impl + { + + class pyramid_down_2_1 : noncopyable + { + public: + + template <typename T> + vector<double,2> point_down ( + const vector<T,2>& p + ) const + { + return p/2.0 - vector<double,2>(1.25,0.75); + } + + template <typename T> + vector<double,2> point_up ( + const vector<T,2>& p + ) const + { + return (p + vector<T,2>(1.25,0.75))*2; + } + + // ----------------------------- + + template <typename T> + vector<double,2> point_down ( + const vector<T,2>& p, + unsigned int levels + ) const + { + vector<double,2> temp = p; + for (unsigned int i = 0; i < levels; ++i) + temp = point_down(temp); + return temp; + } + + template <typename T> + vector<double,2> point_up ( + const vector<T,2>& p, + unsigned int levels + ) const + { + vector<double,2> temp = p; + for (unsigned int i = 0; i < levels; ++i) + temp = point_up(temp); + return temp; + } + + // ----------------------------- + + drectangle rect_up ( + const drectangle& rect + ) const + { + return drectangle(point_up(rect.tl_corner()), point_up(rect.br_corner())); + } + + drectangle rect_up ( + const drectangle& rect, + unsigned int levels + ) const + { + return drectangle(point_up(rect.tl_corner(),levels), point_up(rect.br_corner(),levels)); + } + + // ----------------------------- + + drectangle rect_down ( + const drectangle& rect + ) const + { + return drectangle(point_down(rect.tl_corner()), point_down(rect.br_corner())); + } + + drectangle rect_down ( + const drectangle& rect, + unsigned int levels + ) const + { + return drectangle(point_down(rect.tl_corner(),levels), point_down(rect.br_corner(),levels)); + } + + // ----------------------------- + + private: + template <typename T, typename U> + struct both_images_rgb + { + typedef typename image_traits<T>::pixel_type T_pix; + typedef typename image_traits<U>::pixel_type U_pix; + const static bool value = pixel_traits<T_pix>::rgb && pixel_traits<U_pix>::rgb; + }; + public: + + template < + typename in_image_type, + typename out_image_type + > + typename disable_if<both_images_rgb<in_image_type,out_image_type> >::type operator() ( + const in_image_type& original_, + out_image_type& down_ + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT( is_same_object(original_, down_) == false, + "\t void pyramid_down_2_1::operator()" + << "\n\t is_same_object(original_, down_): " << is_same_object(original_, down_) + << "\n\t this: " << this + ); + + typedef typename image_traits<in_image_type>::pixel_type in_pixel_type; + typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; + COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false ); + COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false ); + + const_image_view<in_image_type> original(original_); + image_view<out_image_type> down(down_); + + if (original.nr() <= 8 || original.nc() <= 8) + { + down.clear(); + return; + } + + typedef typename pixel_traits<in_pixel_type>::basic_pixel_type bp_type; + typedef typename promote<bp_type>::type ptype; + array2d<ptype> temp_img; + temp_img.set_size(original.nr(), (original.nc()-3)/2); + down.set_size((original.nr()-3)/2, (original.nc()-3)/2); + + + // This function applies a 5x5 Gaussian filter to the image. It + // does this by separating the filter into its horizontal and vertical + // components and then downsamples the image by dropping every other + // row and column. Note that we can do these things all together in + // one step. + + // apply row filter + for (long r = 0; r < temp_img.nr(); ++r) + { + long oc = 0; + for (long c = 0; c < temp_img.nc(); ++c) + { + ptype pix1; + ptype pix2; + ptype pix3; + ptype pix4; + ptype pix5; + + assign_pixel(pix1, original[r][oc]); + assign_pixel(pix2, original[r][oc+1]); + assign_pixel(pix3, original[r][oc+2]); + assign_pixel(pix4, original[r][oc+3]); + assign_pixel(pix5, original[r][oc+4]); + + pix2 *= 4; + pix3 *= 6; + pix4 *= 4; + + assign_pixel(temp_img[r][c], pix1 + pix2 + pix3 + pix4 + pix5); + oc += 2; + } + } + + + // apply column filter + long dr = 0; + for (long r = 2; r < temp_img.nr()-2; r += 2) + { + for (long c = 0; c < temp_img.nc(); ++c) + { + ptype temp = temp_img[r-2][c] + + temp_img[r-1][c]*4 + + temp_img[r ][c]*6 + + temp_img[r+1][c]*4 + + temp_img[r+2][c]; + + assign_pixel(down[dr][c],temp/256); + } + ++dr; + } + + } + + private: + struct rgbptype + { + uint16 red; + uint16 green; + uint16 blue; + }; + public: + // ------------------------------------------ + // OVERLOAD FOR RGB TO RGB IMAGES + // ------------------------------------------ + template < + typename in_image_type, + typename out_image_type + > + typename enable_if<both_images_rgb<in_image_type,out_image_type> >::type operator() ( + const in_image_type& original_, + out_image_type& down_ + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT( is_same_object(original_, down_) == false, + "\t void pyramid_down_2_1::operator()" + << "\n\t is_same_object(original_, down_): " << is_same_object(original_, down_) + << "\n\t this: " << this + ); + + typedef typename image_traits<in_image_type>::pixel_type in_pixel_type; + typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; + COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false ); + COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false ); + + const_image_view<in_image_type> original(original_); + image_view<out_image_type> down(down_); + + if (original.nr() <= 8 || original.nc() <= 8) + { + down.clear(); + return; + } + + array2d<rgbptype> temp_img; + temp_img.set_size(original.nr(), (original.nc()-3)/2); + down.set_size((original.nr()-3)/2, (original.nc()-3)/2); + + + // This function applies a 5x5 Gaussian filter to the image. It + // does this by separating the filter into its horizontal and vertical + // components and then downsamples the image by dropping every other + // row and column. Note that we can do these things all together in + // one step. + + // apply row filter + for (long r = 0; r < temp_img.nr(); ++r) + { + long oc = 0; + for (long c = 0; c < temp_img.nc(); ++c) + { + rgbptype pix1; + rgbptype pix2; + rgbptype pix3; + rgbptype pix4; + rgbptype pix5; + + pix1.red = original[r][oc].red; + pix2.red = original[r][oc+1].red; + pix3.red = original[r][oc+2].red; + pix4.red = original[r][oc+3].red; + pix5.red = original[r][oc+4].red; + pix1.green = original[r][oc].green; + pix2.green = original[r][oc+1].green; + pix3.green = original[r][oc+2].green; + pix4.green = original[r][oc+3].green; + pix5.green = original[r][oc+4].green; + pix1.blue = original[r][oc].blue; + pix2.blue = original[r][oc+1].blue; + pix3.blue = original[r][oc+2].blue; + pix4.blue = original[r][oc+3].blue; + pix5.blue = original[r][oc+4].blue; + + pix2.red *= 4; + pix3.red *= 6; + pix4.red *= 4; + + pix2.green *= 4; + pix3.green *= 6; + pix4.green *= 4; + + pix2.blue *= 4; + pix3.blue *= 6; + pix4.blue *= 4; + + rgbptype temp; + temp.red = pix1.red + pix2.red + pix3.red + pix4.red + pix5.red; + temp.green = pix1.green + pix2.green + pix3.green + pix4.green + pix5.green; + temp.blue = pix1.blue + pix2.blue + pix3.blue + pix4.blue + pix5.blue; + + temp_img[r][c] = temp; + + oc += 2; + } + } + + + // apply column filter + long dr = 0; + for (long r = 2; r < temp_img.nr()-2; r += 2) + { + for (long c = 0; c < temp_img.nc(); ++c) + { + rgbptype temp; + temp.red = temp_img[r-2][c].red + + temp_img[r-1][c].red*4 + + temp_img[r ][c].red*6 + + temp_img[r+1][c].red*4 + + temp_img[r+2][c].red; + temp.green = temp_img[r-2][c].green + + temp_img[r-1][c].green*4 + + temp_img[r ][c].green*6 + + temp_img[r+1][c].green*4 + + temp_img[r+2][c].green; + temp.blue = temp_img[r-2][c].blue + + temp_img[r-1][c].blue*4 + + temp_img[r ][c].blue*6 + + temp_img[r+1][c].blue*4 + + temp_img[r+2][c].blue; + + down[dr][c].red = temp.red/256; + down[dr][c].green = temp.green/256; + down[dr][c].blue = temp.blue/256; + } + ++dr; + } + + } + + template < + typename image_type + > + void operator() ( + image_type& img + ) const + { + image_type temp; + (*this)(img, temp); + swap(temp, img); + } + + private: + + + }; + + // ---------------------------------------------------------------------------------------- + // ---------------------------------------------------------------------------------------- + // ---------------------------------------------------------------------------------------- + + class pyramid_down_3_2 : noncopyable + { + public: + + template <typename T> + vector<double,2> point_down ( + const vector<T,2>& p + ) const + { + const double ratio = 2.0/3.0; + return p*ratio - vector<double,2>(1,1); + } + + template <typename T> + vector<double,2> point_up ( + const vector<T,2>& p + ) const + { + const double ratio = 3.0/2.0; + return p*ratio + vector<T,2>(ratio,ratio); + } + + // ----------------------------- + + template <typename T> + vector<double,2> point_down ( + const vector<T,2>& p, + unsigned int levels + ) const + { + vector<double,2> temp = p; + for (unsigned int i = 0; i < levels; ++i) + temp = point_down(temp); + return temp; + } + + template <typename T> + vector<double,2> point_up ( + const vector<T,2>& p, + unsigned int levels + ) const + { + vector<double,2> temp = p; + for (unsigned int i = 0; i < levels; ++i) + temp = point_up(temp); + return temp; + } + + // ----------------------------- + + drectangle rect_up ( + const drectangle& rect + ) const + { + return drectangle(point_up(rect.tl_corner()), point_up(rect.br_corner())); + } + + drectangle rect_up ( + const drectangle& rect, + unsigned int levels + ) const + { + return drectangle(point_up(rect.tl_corner(),levels), point_up(rect.br_corner(),levels)); + } + + // ----------------------------- + + drectangle rect_down ( + const drectangle& rect + ) const + { + return drectangle(point_down(rect.tl_corner()), point_down(rect.br_corner())); + } + + drectangle rect_down ( + const drectangle& rect, + unsigned int levels + ) const + { + return drectangle(point_down(rect.tl_corner(),levels), point_down(rect.br_corner(),levels)); + } + + // ----------------------------- + + private: + template <typename T, typename U> + struct both_images_rgb + { + typedef typename image_traits<T>::pixel_type T_pix; + typedef typename image_traits<U>::pixel_type U_pix; + const static bool value = pixel_traits<T_pix>::rgb && pixel_traits<U_pix>::rgb; + }; + public: + + template < + typename in_image_type, + typename out_image_type + > + typename disable_if<both_images_rgb<in_image_type,out_image_type> >::type operator() ( + const in_image_type& original_, + out_image_type& down_ + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(is_same_object(original_, down_) == false, + "\t void pyramid_down_3_2::operator()" + << "\n\t is_same_object(original_, down_): " << is_same_object(original_, down_) + << "\n\t this: " << this + ); + + typedef typename image_traits<in_image_type>::pixel_type in_pixel_type; + typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; + COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false ); + COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false ); + + const_image_view<in_image_type> original(original_); + image_view<out_image_type> down(down_); + + if (original.nr() <= 8 || original.nc() <= 8) + { + down.clear(); + return; + } + + const long size_in = 3; + const long size_out = 2; + + typedef typename pixel_traits<in_pixel_type>::basic_pixel_type bp_type; + typedef typename promote<bp_type>::type ptype; + const long full_nr = size_out*((original.nr()-2)/size_in); + const long part_nr = (size_out*(original.nr()-2))/size_in; + const long full_nc = size_out*((original.nc()-2)/size_in); + const long part_nc = (size_out*(original.nc()-2))/size_in; + down.set_size(part_nr, part_nc); + + + long rr = 1; + long r; + for (r = 0; r < full_nr; r+=size_out) + { + long cc = 1; + long c; + for (c = 0; c < full_nc; c+=size_out) + { + ptype block[size_in][size_in]; + separable_3x3_filter_block_grayscale(block, original_, rr, cc, 2, 12, 2); + + // bi-linearly interpolate block + assign_pixel(down[r][c] , (block[0][0]*9 + block[1][0]*3 + block[0][1]*3 + block[1][1])/(16*256)); + assign_pixel(down[r][c+1] , (block[0][2]*9 + block[1][2]*3 + block[0][1]*3 + block[1][1])/(16*256)); + assign_pixel(down[r+1][c] , (block[2][0]*9 + block[1][0]*3 + block[2][1]*3 + block[1][1])/(16*256)); + assign_pixel(down[r+1][c+1] , (block[2][2]*9 + block[1][2]*3 + block[2][1]*3 + block[1][1])/(16*256)); + + cc += size_in; + } + if (part_nc - full_nc == 1) + { + ptype block[size_in][2]; + separable_3x3_filter_block_grayscale(block, original_, rr, cc, 2, 12, 2); + + // bi-linearly interpolate partial block + assign_pixel(down[r][c] , (block[0][0]*9 + block[1][0]*3 + block[0][1]*3 + block[1][1])/(16*256)); + assign_pixel(down[r+1][c] , (block[2][0]*9 + block[1][0]*3 + block[2][1]*3 + block[1][1])/(16*256)); + } + rr += size_in; + } + if (part_nr - full_nr == 1) + { + long cc = 1; + long c; + for (c = 0; c < full_nc; c+=size_out) + { + ptype block[2][size_in]; + separable_3x3_filter_block_grayscale(block, original_, rr, cc, 2, 12, 2); + + // bi-linearly interpolate partial block + assign_pixel(down[r][c] , (block[0][0]*9 + block[1][0]*3 + block[0][1]*3 + block[1][1])/(16*256)); + assign_pixel(down[r][c+1] , (block[0][2]*9 + block[1][2]*3 + block[0][1]*3 + block[1][1])/(16*256)); + + cc += size_in; + } + if (part_nc - full_nc == 1) + { + ptype block[2][2]; + separable_3x3_filter_block_grayscale(block, original_, rr, cc, 2, 12, 2); + + // bi-linearly interpolate partial block + assign_pixel(down[r][c] , (block[0][0]*9 + block[1][0]*3 + block[0][1]*3 + block[1][1])/(16*256)); + } + } + + } + + private: + struct rgbptype + { + uint32 red; + uint32 green; + uint32 blue; + }; + + public: + // ------------------------------------------ + // OVERLOAD FOR RGB TO RGB IMAGES + // ------------------------------------------ + template < + typename in_image_type, + typename out_image_type + > + typename enable_if<both_images_rgb<in_image_type,out_image_type> >::type operator() ( + const in_image_type& original_, + out_image_type& down_ + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT( is_same_object(original_, down_) == false, + "\t void pyramid_down_3_2::operator()" + << "\n\t is_same_object(original_, down_): " << is_same_object(original_, down_) + << "\n\t this: " << this + ); + + typedef typename image_traits<in_image_type>::pixel_type in_pixel_type; + typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; + COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false ); + COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false ); + + const_image_view<in_image_type> original(original_); + image_view<out_image_type> down(down_); + + if (original.nr() <= 8 || original.nc() <= 8) + { + down.clear(); + return; + } + + const long size_in = 3; + const long size_out = 2; + + const long full_nr = size_out*((original.nr()-2)/size_in); + const long part_nr = (size_out*(original.nr()-2))/size_in; + const long full_nc = size_out*((original.nc()-2)/size_in); + const long part_nc = (size_out*(original.nc()-2))/size_in; + down.set_size(part_nr, part_nc); + + + long rr = 1; + long r; + for (r = 0; r < full_nr; r+=size_out) + { + long cc = 1; + long c; + for (c = 0; c < full_nc; c+=size_out) + { + rgbptype block[size_in][size_in]; + separable_3x3_filter_block_rgb(block, original_, rr, cc, 2, 12, 2); + + // bi-linearly interpolate block + down[r][c].red = (block[0][0].red*9 + block[1][0].red*3 + block[0][1].red*3 + block[1][1].red)/(16*256); + down[r][c].green = (block[0][0].green*9 + block[1][0].green*3 + block[0][1].green*3 + block[1][1].green)/(16*256); + down[r][c].blue = (block[0][0].blue*9 + block[1][0].blue*3 + block[0][1].blue*3 + block[1][1].blue)/(16*256); + + down[r][c+1].red = (block[0][2].red*9 + block[1][2].red*3 + block[0][1].red*3 + block[1][1].red)/(16*256); + down[r][c+1].green = (block[0][2].green*9 + block[1][2].green*3 + block[0][1].green*3 + block[1][1].green)/(16*256); + down[r][c+1].blue = (block[0][2].blue*9 + block[1][2].blue*3 + block[0][1].blue*3 + block[1][1].blue)/(16*256); + + down[r+1][c].red = (block[2][0].red*9 + block[1][0].red*3 + block[2][1].red*3 + block[1][1].red)/(16*256); + down[r+1][c].green = (block[2][0].green*9 + block[1][0].green*3 + block[2][1].green*3 + block[1][1].green)/(16*256); + down[r+1][c].blue = (block[2][0].blue*9 + block[1][0].blue*3 + block[2][1].blue*3 + block[1][1].blue)/(16*256); + + down[r+1][c+1].red = (block[2][2].red*9 + block[1][2].red*3 + block[2][1].red*3 + block[1][1].red)/(16*256); + down[r+1][c+1].green = (block[2][2].green*9 + block[1][2].green*3 + block[2][1].green*3 + block[1][1].green)/(16*256); + down[r+1][c+1].blue = (block[2][2].blue*9 + block[1][2].blue*3 + block[2][1].blue*3 + block[1][1].blue)/(16*256); + + cc += size_in; + } + if (part_nc - full_nc == 1) + { + rgbptype block[size_in][2]; + separable_3x3_filter_block_rgb(block, original_, rr, cc, 2, 12, 2); + + // bi-linearly interpolate partial block + down[r][c].red = (block[0][0].red*9 + block[1][0].red*3 + block[0][1].red*3 + block[1][1].red)/(16*256); + down[r][c].green = (block[0][0].green*9 + block[1][0].green*3 + block[0][1].green*3 + block[1][1].green)/(16*256); + down[r][c].blue = (block[0][0].blue*9 + block[1][0].blue*3 + block[0][1].blue*3 + block[1][1].blue)/(16*256); + + down[r+1][c].red = (block[2][0].red*9 + block[1][0].red*3 + block[2][1].red*3 + block[1][1].red)/(16*256); + down[r+1][c].green = (block[2][0].green*9 + block[1][0].green*3 + block[2][1].green*3 + block[1][1].green)/(16*256); + down[r+1][c].blue = (block[2][0].blue*9 + block[1][0].blue*3 + block[2][1].blue*3 + block[1][1].blue)/(16*256); + } + rr += size_in; + } + if (part_nr - full_nr == 1) + { + long cc = 1; + long c; + for (c = 0; c < full_nc; c+=size_out) + { + rgbptype block[2][size_in]; + separable_3x3_filter_block_rgb(block, original_, rr, cc, 2, 12, 2); + + // bi-linearly interpolate partial block + down[r][c].red = (block[0][0].red*9 + block[1][0].red*3 + block[0][1].red*3 + block[1][1].red)/(16*256); + down[r][c].green = (block[0][0].green*9 + block[1][0].green*3 + block[0][1].green*3 + block[1][1].green)/(16*256); + down[r][c].blue = (block[0][0].blue*9 + block[1][0].blue*3 + block[0][1].blue*3 + block[1][1].blue)/(16*256); + + down[r][c+1].red = (block[0][2].red*9 + block[1][2].red*3 + block[0][1].red*3 + block[1][1].red)/(16*256); + down[r][c+1].green = (block[0][2].green*9 + block[1][2].green*3 + block[0][1].green*3 + block[1][1].green)/(16*256); + down[r][c+1].blue = (block[0][2].blue*9 + block[1][2].blue*3 + block[0][1].blue*3 + block[1][1].blue)/(16*256); + + cc += size_in; + } + if (part_nc - full_nc == 1) + { + rgbptype block[2][2]; + separable_3x3_filter_block_rgb(block, original_, rr, cc, 2, 12, 2); + + // bi-linearly interpolate partial block + down[r][c].red = (block[0][0].red*9 + block[1][0].red*3 + block[0][1].red*3 + block[1][1].red)/(16*256); + down[r][c].green = (block[0][0].green*9 + block[1][0].green*3 + block[0][1].green*3 + block[1][1].green)/(16*256); + down[r][c].blue = (block[0][0].blue*9 + block[1][0].blue*3 + block[0][1].blue*3 + block[1][1].blue)/(16*256); + } + } + } + + template < + typename image_type + > + void operator() ( + image_type& img + ) const + { + image_type temp; + (*this)(img, temp); + swap(temp, img); + } + private: + + + }; + + } + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + + template < + unsigned int N + > + class pyramid_down : noncopyable + { + public: + + COMPILE_TIME_ASSERT(N > 0); + + template <typename T> + vector<double,2> point_down ( + const vector<T,2>& p + ) const + { + const double ratio = (N-1.0)/N; + return (p - 0.3)*ratio; + } + + template <typename T> + vector<double,2> point_up ( + const vector<T,2>& p + ) const + { + const double ratio = N/(N-1.0); + return p*ratio + 0.3; + } + + // ----------------------------- + + template <typename T> + vector<double,2> point_down ( + const vector<T,2>& p, + unsigned int levels + ) const + { + vector<double,2> temp = p; + for (unsigned int i = 0; i < levels; ++i) + temp = point_down(temp); + return temp; + } + + template <typename T> + vector<double,2> point_up ( + const vector<T,2>& p, + unsigned int levels + ) const + { + vector<double,2> temp = p; + for (unsigned int i = 0; i < levels; ++i) + temp = point_up(temp); + return temp; + } + + // ----------------------------- + + drectangle rect_up ( + const drectangle& rect + ) const + { + return drectangle(point_up(rect.tl_corner()), point_up(rect.br_corner())); + } + + drectangle rect_up ( + const drectangle& rect, + unsigned int levels + ) const + { + return drectangle(point_up(rect.tl_corner(),levels), point_up(rect.br_corner(),levels)); + } + + // ----------------------------- + + drectangle rect_down ( + const drectangle& rect + ) const + { + return drectangle(point_down(rect.tl_corner()), point_down(rect.br_corner())); + } + + drectangle rect_down ( + const drectangle& rect, + unsigned int levels + ) const + { + return drectangle(point_down(rect.tl_corner(),levels), point_down(rect.br_corner(),levels)); + } + + template < + typename in_image_type, + typename out_image_type + > + void operator() ( + const in_image_type& original, + out_image_type& down + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(is_same_object(original, down) == false, + "\t void pyramid_down::operator()" + << "\n\t is_same_object(original, down): " << is_same_object(original, down) + << "\n\t this: " << this + ); + + typedef typename image_traits<in_image_type>::pixel_type in_pixel_type; + typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; + COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false ); + COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false ); + + + set_image_size(down, ((N-1)*num_rows(original))/N+0.5, ((N-1)*num_columns(original))/N+0.5); + resize_image(original, down); + } + + template < + typename image_type + > + void operator() ( + image_type& img + ) const + { + image_type temp; + (*this)(img, temp); + swap(temp, img); + } + }; + + template <> + class pyramid_down<1> : public pyramid_disable {}; + + template <> + class pyramid_down<2> : public dlib::impl::pyramid_down_2_1 {}; + + template <> + class pyramid_down<3> : public dlib::impl::pyramid_down_3_2 {}; + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + + template <unsigned int N> + double pyramid_rate(const pyramid_down<N>&) + { + return (N-1.0)/N; + } + +// ---------------------------------------------------------------------------------------- + + template <unsigned int N> + void find_pyramid_down_output_image_size( + const pyramid_down<N>& pyr, + long& nr, + long& nc + ) + { + const double rate = pyramid_rate(pyr); + nr = std::floor(rate*nr); + nc = std::floor(rate*nc); + } + + inline void find_pyramid_down_output_image_size( + const pyramid_down<3>& /*pyr*/, + long& nr, + long& nc + ) + { + nr = 2*(nr-2)/3; + nc = 2*(nc-2)/3; + } + + inline void find_pyramid_down_output_image_size( + const pyramid_down<2>& /*pyr*/, + long& nr, + long& nc + ) + { + nr = (nr-3)/2; + nc = (nc-3)/2; + } + + inline void find_pyramid_down_output_image_size( + const pyramid_down<1>& /*pyr*/, + long& nr, + long& nc + ) + { + nr = 0; + nc = 0; + } + +// ---------------------------------------------------------------------------------------- + + namespace impl + { + template <typename pyramid_type> + void compute_tiled_image_pyramid_details ( + const pyramid_type& pyr, + long nr, + long nc, + const unsigned long padding, + const unsigned long outer_padding, + std::vector<rectangle>& rects, + long& pyramid_image_nr, + long& pyramid_image_nc + ) + { + rects.clear(); + if (nr*nc == 0) + { + pyramid_image_nr = 0; + pyramid_image_nc = 0; + return; + } + + const long min_height = 5; + rects.reserve(100); + rects.push_back(rectangle(nc,nr)); + // build the whole pyramid + while(true) + { + find_pyramid_down_output_image_size(pyr, nr, nc); + if (nr*nc == 0 || nr < min_height) + break; + rects.push_back(rectangle(nc,nr)); + } + + // figure out output image size + long total_height = 0; + for (auto&& i : rects) + total_height += i.height()+padding; + total_height -= padding*2; // don't add unnecessary padding to the very right side. + long height = 0; + long prev_width = 0; + for (auto&& i : rects) + { + // Figure out how far we go on the first column. We go until the next image can + // fit next to the previous one, which means we can double back for the second + // column of images. + if (i.width() <= rects[0].width()-prev_width-(long)padding && + (height-rects[0].height())*2 >= (total_height-rects[0].height())) + { + break; + } + height += i.height() + padding; + prev_width = i.width(); + } + height -= padding; // don't add unnecessary padding to the very right side. + + const long width = rects[0].width(); + pyramid_image_nr = height+outer_padding*2; + pyramid_image_nc = width+outer_padding*2; + + + long y = outer_padding; + size_t i = 0; + while(y < height+(long)outer_padding && i < rects.size()) + { + rects[i] = translate_rect(rects[i],point(outer_padding,y)); + DLIB_ASSERT(rectangle(pyramid_image_nc,pyramid_image_nr).contains(rects[i])); + y += rects[i].height()+padding; + ++i; + } + y -= padding; + while (i < rects.size()) + { + point p1(outer_padding+width-1,y-1); + point p2 = p1 - rects[i].br_corner(); + rectangle rect(p1,p2); + DLIB_ASSERT(rectangle(pyramid_image_nc,pyramid_image_nr).contains(rect)); + // don't keep going on the last row if it would intersect the original image. + if (!rects[0].intersect(rect).is_empty()) + break; + + rects[i] = rect; + y -= rects[i].height()+padding; + ++i; + } + + // Delete any extraneous rectangles if we broke out of the above loop early due to + // intersection with the original image. + rects.resize(i); + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename pyramid_type, + typename image_type1, + typename image_type2 + > + void create_tiled_pyramid ( + const image_type1& img, + image_type2& out_img, + std::vector<rectangle>& rects, + const unsigned long padding = 10, + const unsigned long outer_padding = 0 + ) + { + DLIB_ASSERT(!is_same_object(img, out_img)); + + long out_nr, out_nc; + pyramid_type pyr; + impl::compute_tiled_image_pyramid_details(pyr, img.nr(), img.nc(), padding, outer_padding, rects, out_nr, out_nc); + + set_image_size(out_img, out_nr, out_nc); + assign_all_pixels(out_img, 0); + + if (rects.size() == 0) + return; + + // now build the image pyramid into out_img + auto si = sub_image(out_img, rects[0]); + assign_image(si, img); + for (size_t i = 1; i < rects.size(); ++i) + { + auto s1 = sub_image(out_img, rects[i-1]); + auto s2 = sub_image(out_img, rects[i]); + pyr(s1,s2); + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename pyramid_type + > + dpoint image_to_tiled_pyramid ( + const std::vector<rectangle>& rects, + double scale, + dpoint p + ) + { + DLIB_CASSERT(rects.size() > 0); + DLIB_CASSERT(0 < scale && scale <= 1); + pyramid_type pyr; + // This scale factor maps this many levels down the pyramid + long pyramid_down_iter = static_cast<long>(std::log(scale)/std::log(pyramid_rate(pyr))+0.5); + pyramid_down_iter = put_in_range(0, (long)rects.size()-1, pyramid_down_iter); + + return rects[pyramid_down_iter].tl_corner() + pyr.point_down(p, pyramid_down_iter); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename pyramid_type + > + drectangle image_to_tiled_pyramid ( + const std::vector<rectangle>& rects, + double scale, + drectangle r + ) + { + DLIB_ASSERT(rects.size() > 0); + DLIB_ASSERT(0 < scale && scale <= 1); + return drectangle(image_to_tiled_pyramid<pyramid_type>(rects, scale, r.tl_corner()), + image_to_tiled_pyramid<pyramid_type>(rects, scale, r.br_corner())); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename pyramid_type + > + dpoint tiled_pyramid_to_image ( + const std::vector<rectangle>& rects, + dpoint p + ) + { + DLIB_CASSERT(rects.size() > 0); + + size_t pyramid_down_iter = nearest_rect(rects, p); + + p -= rects[pyramid_down_iter].tl_corner(); + pyramid_type pyr; + return pyr.point_up(p, pyramid_down_iter); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename pyramid_type + > + drectangle tiled_pyramid_to_image ( + const std::vector<rectangle>& rects, + drectangle r + ) + { + DLIB_CASSERT(rects.size() > 0); + + size_t pyramid_down_iter = nearest_rect(rects, dcenter(r)); + + dpoint origin = rects[pyramid_down_iter].tl_corner(); + r = drectangle(r.tl_corner()-origin, r.br_corner()-origin); + pyramid_type pyr; + return pyr.rect_up(r, pyramid_down_iter); + } + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_IMAGE_PYRaMID_Hh_ + diff --git a/ml/dlib/dlib/image_transforms/image_pyramid_abstract.h b/ml/dlib/dlib/image_transforms/image_pyramid_abstract.h new file mode 100644 index 000000000..a61b275fd --- /dev/null +++ b/ml/dlib/dlib/image_transforms/image_pyramid_abstract.h @@ -0,0 +1,384 @@ +// Copyright (C) 2010 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_IMAGE_PYRaMID_ABSTRACT_Hh_ +#ifdef DLIB_IMAGE_PYRaMID_ABSTRACT_Hh_ + +#include "../pixel.h" +#include "../array2d.h" +#include "../geometry.h" +#include "../image_processing/generic_image.h" + +namespace dlib +{ + + template < + unsigned int N + > + class pyramid_down : noncopyable + { + /*! + REQUIREMENTS ON N + N > 0 + + WHAT THIS OBJECT REPRESENTS + This is a simple functor to help create image pyramids. In particular, it + downsamples images at a ratio of N to N-1. + + Note that setting N to 1 means that this object functions like + pyramid_disable (defined at the bottom of this file). + + WARNING, when mapping rectangles from one layer of a pyramid + to another you might end up with rectangles which extend slightly + outside your images. This is because points on the border of an + image at a higher pyramid layer might correspond to points outside + images at lower layers. So just keep this in mind. Note also + that it's easy to deal with. Just say something like this: + rect = rect.intersect(get_rect(my_image)); // keep rect inside my_image + !*/ + public: + + template < + typename in_image_type, + typename out_image_type + > + void operator() ( + const in_image_type& original, + out_image_type& down + ) const; + /*! + requires + - is_same_object(original, down) == false + - in_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - out_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - for both pixel types P in the input and output images, we require: + - pixel_traits<P>::has_alpha == false + ensures + - #down will contain an image that is roughly (N-1)/N times the size of the + original image. + - If both input and output images contain RGB pixels then the downsampled image will + be in color. Otherwise, the downsampling will be performed in a grayscale mode. + - The location of a point P in original image will show up at point point_down(P) + in the #down image. + - Note that some points on the border of the original image might correspond to + points outside the #down image. + !*/ + + template < + typename image_type + > + void operator() ( + image_type& img + ) const; + /*! + requires + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - pixel_traits<typename image_traits<image_type>::pixel_type>::has_alpha == false + ensures + - This function downsamples the given image and stores the results in #img. + In particular, it is equivalent to performing: + (*this)(img, temp); + swap(img, temp); + !*/ + + // ------------------------------- + + template <typename T> + vector<double,2> point_down ( + const vector<T,2>& p + ) const; + /*! + ensures + - interprets p as a point in a parent image and returns the + point in a downsampled image which corresponds to p. + - This function is the inverse of point_up(). I.e. for a point P: + point_down(point_up(P)) == P + !*/ + + template <typename T> + vector<double,2> point_up ( + const vector<T,2>& p + ) const; + /*! + ensures + - interprets p as a point in a downsampled image and returns the + point in a parent image which corresponds to p. + - This function is the inverse of point_down(). I.e. for a point P: + point_up(point_down(P)) == P + !*/ + + drectangle rect_down ( + const drectangle& rect + ) const; + /*! + ensures + - returns drectangle(point_down(rect.tl_corner()), point_down(rect.br_corner())); + (i.e. maps rect into a downsampled) + !*/ + + drectangle rect_up ( + const drectangle& rect + ) const; + /*! + ensures + - returns drectangle(point_up(rect.tl_corner()), point_up(rect.br_corner())); + (i.e. maps rect into a parent image) + !*/ + + // ------------------------------- + + template <typename T> + vector<double,2> point_down ( + const vector<T,2>& p, + unsigned int levels + ) const; + /*! + ensures + - applies point_down() to p levels times and returns the result. + (i.e. point_down(p,2) == point_down(point_down(p)), + point_down(p,1) == point_down(p), + point_down(p,0) == p, etc. ) + !*/ + + template <typename T> + vector<double,2> point_up ( + const vector<T,2>& p, + unsigned int levels + ) const; + /*! + ensures + - applies point_up() to p levels times and returns the result. + (i.e. point_up(p,2) == point_up(point_up(p)), + point_up(p,1) == point_up(p), + point_up(p,0) == p, etc. ) + !*/ + + drectangle rect_down ( + const drectangle& rect, + unsigned int levels + ) const; + /*! + ensures + - returns drectangle(point_down(rect.tl_corner(),levels), point_down(rect.br_corner(),levels)); + (i.e. Basically applies rect_down() to rect levels times and returns the result.) + !*/ + + drectangle rect_up ( + const drectangle& rect, + unsigned int levels + ) const; + /*! + ensures + - returns drectangle(point_up(rect.tl_corner(),levels), point_up(rect.br_corner(),levels)); + (i.e. Basically applies rect_up() to rect levels times and returns the result.) + !*/ + + }; + +// ---------------------------------------------------------------------------------------- + + class pyramid_disable : noncopyable + { + /*! + WHAT THIS OBJECT REPRESENTS + This is a function object with an interface identical to pyramid_down (defined + at the top of this file) except that it downsamples images at a ratio of infinity + to 1. That means it always outputs images of size 0 regardless of the size + of the inputs. + + This is useful because it can be supplied to routines which take a pyramid_down + function object and it will essentially disable pyramid processing. This way, + a pyramid oriented function can be turned into a regular routine which processes + just the original undownsampled image. + !*/ + }; + +// ---------------------------------------------------------------------------------------- + + template < + unsigned int N + > + double pyramid_rate( + const pyramid_down<N>& pyr + ); + /*! + ensures + - returns (N-1.0)/N + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + unsigned int N + > + void find_pyramid_down_output_image_size( + const pyramid_down<N>& pyr, + long& nr, + long& nc + ); + /*! + requires + - nr >= 0 + - nc >= 0 + ensures + - If pyr() were called on an image with nr by nc rows and columns, what would + be the size of the output image? This function finds the size of the output + image and stores it back into #nr and #nc. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename pyramid_type, + typename image_type1, + typename image_type2 + > + void create_tiled_pyramid ( + const image_type1& img, + image_type2& out_img, + std::vector<rectangle>& rects, + const unsigned long padding = 10, + const unsigned long outer_padding = 0 + ); + /*! + requires + - pyramid_type == one of the dlib::pyramid_down template instances defined above. + - is_same_object(img, out_img) == false + - image_type1 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - image_type2 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - for both pixel types P in the input and output images, we require: + - pixel_traits<P>::has_alpha == false + ensures + - Creates an image pyramid from the input image img. The pyramid is made using + pyramid_type. The highest resolution image is img and then all further + pyramid levels are generated from pyramid_type's downsampling. The entire + resulting pyramid is packed into a single image and stored in out_img. + - When packing pyramid levels into out_img, there will be padding pixels of + space between each sub-image. There will also be outer_padding pixels of + padding around the edge of the image. All padding pixels have a value of 0. + - The resulting pyramid will be composed of #rects.size() images packed into + out_img. Moreover, #rects[i] is the location inside out_img of the i-th + pyramid level. + - #rects.size() > 0 + - #rects[0] == get_rect(img). I.e. the first rectangle is the highest + resolution pyramid layer. Subsequent elements of #rects correspond to + smaller and smaller pyramid layers inside out_img. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename pyramid_type + > + dpoint image_to_tiled_pyramid ( + const std::vector<rectangle>& rects, + double scale, + dpoint p + ); + /*! + requires + - pyramid_type == one of the dlib::pyramid_down template instances defined above. + - 0 < scale <= 1 + - rects.size() > 0 + ensures + - The function create_tiled_pyramid() converts an image, img, to a "tiled + pyramid" called out_img. It also outputs a vector of rectangles, rect, that + show where each pyramid layer appears in out_img. Therefore, + image_to_tiled_pyramid() allows you to map from coordinates in img (i.e. p) + to coordinates in the tiled pyramid out_img, when given the rects metadata. + + So given a point p in img, you can ask, what coordinate in out_img + corresponds to img[p.y()][p.x()] when things are scale times smaller? This + new coordinate is a location in out_img and is what is returned by this + function. + - A scale of 1 means we don't move anywhere in the pyramid scale space relative + to the input image while smaller values of scale mean we move down the + pyramid. + - Assumes pyramid_type is the pyramid class used to produce the tiled image. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename pyramid_type + > + drectangle image_to_tiled_pyramid ( + const std::vector<rectangle>& rects, + double scale, + drectangle r + ); + /*! + requires + - pyramid_type == one of the dlib::pyramid_down template instances defined above. + - 0 < scale <= 1 + - rects.size() > 0 + ensures + - This function maps from input image space to tiled pyramid coordinate space + just as the above image_to_tiled_pyramid() does, except it operates on + rectangle objects instead of points. + - Assumes pyramid_type is the pyramid class used to produce the tiled image. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename pyramid_type + > + dpoint tiled_pyramid_to_image ( + const std::vector<rectangle>& rects, + dpoint p + ); + /*! + requires + - pyramid_type == one of the dlib::pyramid_down template instances defined above. + - rects.size() > 0 + ensures + - This function maps from a coordinate in a tiled pyramid to the corresponding + input image coordinate. Therefore, it is essentially the inverse of + image_to_tiled_pyramid(). + - It should be noted that this function isn't always an inverse of + image_to_tiled_pyramid(). This is because you can ask + image_to_tiled_pyramid() for the coordinates of points outside the input + image and they will be mapped to somewhere that doesn't have an inverse. But + for points actually inside the image this function performs an approximate + inverse mapping. + - Assumes pyramid_type is the pyramid class used to produce the tiled image. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename pyramid_type + > + drectangle tiled_pyramid_to_image ( + const std::vector<rectangle>& rects, + drectangle r + ); + /*! + requires + - pyramid_type == one of the dlib::pyramid_down template instances defined above. + - rects.size() > 0 + ensures + - This function maps from a coordinate in a tiled pyramid to the corresponding + input image coordinate. Therefore, it is essentially the inverse of + image_to_tiled_pyramid(). + - It should be noted that this function isn't always an inverse of + image_to_tiled_pyramid(). This is because you can ask + image_to_tiled_pyramid() for the coordinates of points outside the input + image and they will be mapped to somewhere that doesn't have an inverse. But + for points actually inside the image this function performs an approximate + inverse mapping. + - Assumes pyramid_type is the pyramid class used to produce the tiled image. + !*/ + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_IMAGE_PYRaMID_ABSTRACT_Hh_ + + diff --git a/ml/dlib/dlib/image_transforms/integral_image.h b/ml/dlib/dlib/image_transforms/integral_image.h new file mode 100644 index 000000000..2ae47d921 --- /dev/null +++ b/ml/dlib/dlib/image_transforms/integral_image.h @@ -0,0 +1,190 @@ +// Copyright (C) 2009 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_INTEGRAL_IMAGE +#define DLIB_INTEGRAL_IMAGE + +#include "integral_image_abstract.h" + +#include "../algs.h" +#include "../assert.h" +#include "../geometry.h" +#include "../array2d.h" +#include "../matrix.h" +#include "../pixel.h" +#include "../noncopyable.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + class integral_image_generic : noncopyable + { + public: + typedef T value_type; + + long nr() const { return int_img.nr(); } + long nc() const { return int_img.nc(); } + + template <typename image_type> + void load ( + const image_type& img_ + ) + { + const_image_view<image_type> img(img_); + T pixel; + int_img.set_size(img.nr(), img.nc()); + + // compute the first row of the integral image + T temp = 0; + for (long c = 0; c < img.nc(); ++c) + { + assign_pixel(pixel, img[0][c]); + temp += pixel; + int_img[0][c] = temp; + } + + // now compute the rest of the integral image + for (long r = 1; r < img.nr(); ++r) + { + temp = 0; + for (long c = 0; c < img.nc(); ++c) + { + assign_pixel(pixel, img[r][c]); + temp += pixel; + int_img[r][c] = temp + int_img[r-1][c]; + } + } + + } + + value_type get_sum_of_area ( + const rectangle& rect + ) const + { + DLIB_ASSERT(get_rect(*this).contains(rect) == true && rect.is_empty() == false, + "\tvalue_type get_sum_of_area(rect)" + << "\n\tYou have given a rectangle that goes outside the image" + << "\n\tthis: " << this + << "\n\trect.is_empty(): " << rect.is_empty() + << "\n\trect: " << rect + << "\n\tget_rect(*this): " << get_rect(*this) + ); + + T top_left = 0, top_right = 0, bottom_left = 0, bottom_right = 0; + + bottom_right = int_img[rect.bottom()][rect.right()]; + if (rect.left()-1 >= 0 && rect.top()-1 >= 0) + { + top_left = int_img[rect.top()-1][rect.left()-1]; + bottom_left = int_img[rect.bottom()][rect.left()-1]; + top_right = int_img[rect.top()-1][rect.right()]; + } + else if (rect.left()-1 >= 0) + { + bottom_left = int_img[rect.bottom()][rect.left()-1]; + } + else if (rect.top()-1 >= 0) + { + top_right = int_img[rect.top()-1][rect.right()]; + } + + return bottom_right - bottom_left - top_right + top_left; + } + + void swap(integral_image_generic& item) + { + int_img.swap(item.int_img); + } + + private: + + array2d<T> int_img; + }; + + + template < + typename T + > + void swap ( + integral_image_generic<T>& a, + integral_image_generic<T>& b + ) { a.swap(b); } + +// ---------------------------------------------------------------------------------------- + + typedef integral_image_generic<long> integral_image; + +// ---------------------------------------------------------------------------------------- + + template <typename integral_image_type> + typename integral_image_type::value_type haar_x ( + const integral_image_type& img, + const point& p, + long width + ) + { + DLIB_ASSERT(get_rect(img).contains(centered_rect(p,width,width)) == true, + "\tlong haar_x(img,p,width)" + << "\n\tYou have given a point and with that goes outside the image" + << "\n\tget_rect(img): " << get_rect(img) + << "\n\tp: " << p + << "\n\twidth: " << width + ); + + rectangle left_rect; + left_rect.set_left ( p.x() - width / 2 ); + left_rect.set_top ( p.y() - width / 2 ); + left_rect.set_right ( p.x()-1 ); + left_rect.set_bottom ( left_rect.top() + width - 1 ); + + rectangle right_rect; + right_rect.set_left ( p.x() ); + right_rect.set_top ( left_rect.top() ); + right_rect.set_right ( left_rect.left() + width -1 ); + right_rect.set_bottom ( left_rect.bottom() ); + + return img.get_sum_of_area(right_rect) - img.get_sum_of_area(left_rect); + } + + // ---------------------------------------------------------------------------- + + template <typename integral_image_type> + typename integral_image_type::value_type haar_y ( + const integral_image_type& img, + const point& p, + long width + ) + { + DLIB_ASSERT(get_rect(img).contains(centered_rect(p,width,width)) == true, + "\tlong haar_y(img,p,width)" + << "\n\tYou have given a point and with that goes outside the image" + << "\n\tget_rect(img): " << get_rect(img) + << "\n\tp: " << p + << "\n\twidth: " << width + ); + + rectangle top_rect; + top_rect.set_left ( p.x() - width / 2 ); + top_rect.set_top ( p.y() - width / 2 ); + top_rect.set_right ( top_rect.left() + width - 1 ); + top_rect.set_bottom ( p.y()-1 ); + + rectangle bottom_rect; + bottom_rect.set_left ( top_rect.left() ); + bottom_rect.set_top ( p.y() ); + bottom_rect.set_right ( top_rect.right() ); + bottom_rect.set_bottom ( top_rect.top() + width - 1 ); + + return img.get_sum_of_area(bottom_rect) - img.get_sum_of_area(top_rect); + } + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_INTEGRAL_IMAGE + diff --git a/ml/dlib/dlib/image_transforms/integral_image_abstract.h b/ml/dlib/dlib/image_transforms/integral_image_abstract.h new file mode 100644 index 000000000..583fa0375 --- /dev/null +++ b/ml/dlib/dlib/image_transforms/integral_image_abstract.h @@ -0,0 +1,169 @@ +// Copyright (C) 2009 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_INTEGRAL_IMAGe_ABSTRACT_ +#ifdef DLIB_INTEGRAL_IMAGe_ABSTRACT_ + +#include "../geometry/rectangle_abstract.h" +#include "../array2d/array2d_kernel_abstract.h" +#include "../pixel.h" +#include "../noncopyable.h" +#include "../image_processing/generic_image.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + class integral_image_generic : noncopyable + { + /*! + REQUIREMENTS ON T + T should be a built in scalar type. Moreover, it should + be capable of storing sums of whatever kind of pixel + you will be dealing with. + + INITIAL VALUE + - nr() == 0 + - nc() == 0 + + WHAT THIS OBJECT REPRESENTS + This object is an alternate way of representing image data + that allows for very fast computations of sums of pixels in + rectangular regions. To use this object you load it with a + normal image and then you can use the get_sum_of_area() + function to compute sums of pixels in a given area in + constant time. + !*/ + public: + typedef T value_type; + + const long nr( + ) const; + /*! + ensures + - returns the number of rows in this integral image object + !*/ + + const long nc( + ) const; + /*! + ensures + - returns the number of columns in this integral image object + !*/ + + template <typename image_type> + void load ( + const image_type& img + ); + /*! + requires + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - Let P denote the type of pixel in img, then we require: + - pixel_traits<P>::has_alpha == false + ensures + - #nr() == img.nr() + - #nc() == img.nc() + - #*this will now contain an "integral image" representation of the + given input image. + !*/ + + value_type get_sum_of_area ( + const rectangle& rect + ) const; + /*! + requires + - rect.is_empty() == false + - get_rect(*this).contains(rect) == true + (i.e. rect must not be outside the integral image) + ensures + - Let O denote the image this integral image was generated from. + Then this function returns sum(subm(mat(O),rect)). + That is, this function returns the sum of the pixels in O that + are contained within the given rectangle. + !*/ + + void swap( + integral_image_generic& item + ); + /*! + ensures + - swaps *this and item + !*/ + + }; + +// ---------------------------------------------------------------------------------------- + + template < typename T > + void swap ( + integral_image_generic<T>& a, + integral_image_generic<T>& b + ) { a.swap(b); } + /*! + provides a global swap function + !*/ + +// ---------------------------------------------------------------------------------------- + + typedef integral_image_generic<long> integral_image; + +// ---------------------------------------------------------------------------------------- + + template <typename integral_image_type> + typename integral_image_type::value_type haar_x ( + const integral_image_type& img, + const point& p, + long width + ) + /*! + requires + - get_rect(img).contains(centered_rect(p,width,width)) == true + - integral_image_type == a type that implements the integral_image_generic + interface defined above + ensures + - returns the response of a Haar wavelet centered at the point p + with the given width. The wavelet is oriented along the X axis + and has the following shape: + ----++++ + ----++++ + ----++++ + ----++++ + That is, the wavelet is square and computes the sum of pixels on the + right minus the sum of pixels on the left. + !*/ + +// ---------------------------------------------------------------------------------------- + + template <typename integral_image_type> + typename integral_image_type::value_type haar_y ( + const integral_image_type& img, + const point& p, + long width + ) + /*! + requires + - get_rect(img).contains(centered_rect(p,width,width)) == true + - integral_image_type == a type that implements the integral_image_generic + interface defined above + ensures + - returns the response of a Haar wavelet centered at the point p + with the given width in the given image. The wavelet is oriented + along the Y axis and has the following shape: + -------- + -------- + ++++++++ + ++++++++ + That is, the wavelet is square and computes the sum of pixels on the + bottom minus the sum of pixels on the top. + !*/ + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_INTEGRAL_IMAGe_ABSTRACT_ + diff --git a/ml/dlib/dlib/image_transforms/interpolation.h b/ml/dlib/dlib/image_transforms/interpolation.h new file mode 100644 index 000000000..11c561e2d --- /dev/null +++ b/ml/dlib/dlib/image_transforms/interpolation.h @@ -0,0 +1,2193 @@ +// Copyright (C) 2012 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_INTERPOlATIONh_ +#define DLIB_INTERPOlATIONh_ + +#include "interpolation_abstract.h" +#include "../pixel.h" +#include "../matrix.h" +#include "assign_image.h" +#include "image_pyramid.h" +#include "../simd.h" +#include "../image_processing/full_object_detection.h" +#include <limits> +#include "../rand.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template <typename T> + struct sub_image_proxy + { + sub_image_proxy() = default; + + sub_image_proxy ( + T& img, + rectangle rect + ) + { + rect = rect.intersect(get_rect(img)); + typedef typename image_traits<T>::pixel_type pixel_type; + + _nr = rect.height(); + _nc = rect.width(); + _width_step = width_step(img); + _data = (char*)image_data(img) + sizeof(pixel_type)*rect.left() + rect.top()*_width_step; + } + + void* _data = 0; + long _width_step = 0; + long _nr = 0; + long _nc = 0; + }; + + template <typename T> + struct const_sub_image_proxy + { + const_sub_image_proxy() = default; + + const_sub_image_proxy ( + const T& img, + rectangle rect + ) + { + rect = rect.intersect(get_rect(img)); + typedef typename image_traits<T>::pixel_type pixel_type; + + _nr = rect.height(); + _nc = rect.width(); + _width_step = width_step(img); + _data = (const char*)image_data(img) + sizeof(pixel_type)*rect.left() + rect.top()*_width_step; + } + + const void* _data = 0; + long _width_step = 0; + long _nr = 0; + long _nc = 0; + }; + + template <typename T> + struct image_traits<sub_image_proxy<T> > + { + typedef typename image_traits<T>::pixel_type pixel_type; + }; + template <typename T> + struct image_traits<const sub_image_proxy<T> > + { + typedef typename image_traits<T>::pixel_type pixel_type; + }; + template <typename T> + struct image_traits<const_sub_image_proxy<T> > + { + typedef typename image_traits<T>::pixel_type pixel_type; + }; + template <typename T> + struct image_traits<const const_sub_image_proxy<T> > + { + typedef typename image_traits<T>::pixel_type pixel_type; + }; + + template <typename T> + inline long num_rows( const sub_image_proxy<T>& img) { return img._nr; } + template <typename T> + inline long num_columns( const sub_image_proxy<T>& img) { return img._nc; } + + template <typename T> + inline long num_rows( const const_sub_image_proxy<T>& img) { return img._nr; } + template <typename T> + inline long num_columns( const const_sub_image_proxy<T>& img) { return img._nc; } + + template <typename T> + inline void* image_data( sub_image_proxy<T>& img) + { + return img._data; + } + template <typename T> + inline const void* image_data( const sub_image_proxy<T>& img) + { + return img._data; + } + + template <typename T> + inline const void* image_data( const const_sub_image_proxy<T>& img) + { + return img._data; + } + + template <typename T> + inline long width_step( + const sub_image_proxy<T>& img + ) { return img._width_step; } + + template <typename T> + inline long width_step( + const const_sub_image_proxy<T>& img + ) { return img._width_step; } + + template <typename T> + void set_image_size(sub_image_proxy<T>& img, long rows, long cols) + { + DLIB_CASSERT(img._nr == rows && img._nc == cols, "A sub_image can't be resized." + << "\n\t img._nr: "<< img._nr + << "\n\t img._nc: "<< img._nc + << "\n\t rows: "<< rows + << "\n\t cols: "<< cols + ); + } + + template < + typename image_type + > + sub_image_proxy<image_type> sub_image ( + image_type& img, + const rectangle& rect + ) + { + return sub_image_proxy<image_type>(img,rect); + } + + template < + typename image_type + > + const const_sub_image_proxy<image_type> sub_image ( + const image_type& img, + const rectangle& rect + ) + { + return const_sub_image_proxy<image_type>(img,rect); + } + + template <typename T> + inline sub_image_proxy<matrix<T>> sub_image ( + T* img, + long nr, + long nc, + long row_stride + ) + { + sub_image_proxy<matrix<T>> tmp; + tmp._data = img; + tmp._nr = nr; + tmp._nc = nc; + tmp._width_step = row_stride*sizeof(T); + return tmp; + } + + template <typename T> + inline const const_sub_image_proxy<matrix<T>> sub_image ( + const T* img, + long nr, + long nc, + long row_stride + ) + { + const_sub_image_proxy<matrix<T>> tmp; + tmp._data = img; + tmp._nr = nr; + tmp._nc = nc; + tmp._width_step = row_stride*sizeof(T); + return tmp; + } + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + + class interpolate_nearest_neighbor + { + public: + + template <typename image_view_type, typename pixel_type> + bool operator() ( + const image_view_type& img, + const dlib::point& p, + pixel_type& result + ) const + { + COMPILE_TIME_ASSERT(pixel_traits<typename image_view_type::pixel_type>::has_alpha == false); + + if (get_rect(img).contains(p)) + { + assign_pixel(result, img[p.y()][p.x()]); + return true; + } + else + { + return false; + } + } + + }; + +// ---------------------------------------------------------------------------------------- + + class interpolate_bilinear + { + template <typename T> + struct is_rgb_image + { + const static bool value = pixel_traits<typename T::pixel_type>::rgb; + }; + + public: + + template <typename T, typename image_view_type, typename pixel_type> + typename disable_if<is_rgb_image<image_view_type>,bool>::type operator() ( + const image_view_type& img, + const dlib::vector<T,2>& p, + pixel_type& result + ) const + { + COMPILE_TIME_ASSERT(pixel_traits<typename image_view_type::pixel_type>::has_alpha == false); + + const long left = static_cast<long>(std::floor(p.x())); + const long top = static_cast<long>(std::floor(p.y())); + const long right = left+1; + const long bottom = top+1; + + + // if the interpolation goes outside img + if (!(left >= 0 && top >= 0 && right < img.nc() && bottom < img.nr())) + return false; + + const double lr_frac = p.x() - left; + const double tb_frac = p.y() - top; + + double tl = 0, tr = 0, bl = 0, br = 0; + + assign_pixel(tl, img[top][left]); + assign_pixel(tr, img[top][right]); + assign_pixel(bl, img[bottom][left]); + assign_pixel(br, img[bottom][right]); + + double temp = (1-tb_frac)*((1-lr_frac)*tl + lr_frac*tr) + + tb_frac*((1-lr_frac)*bl + lr_frac*br); + + assign_pixel(result, temp); + return true; + } + + template <typename T, typename image_view_type, typename pixel_type> + typename enable_if<is_rgb_image<image_view_type>,bool>::type operator() ( + const image_view_type& img, + const dlib::vector<T,2>& p, + pixel_type& result + ) const + { + COMPILE_TIME_ASSERT(pixel_traits<typename image_view_type::pixel_type>::has_alpha == false); + + const long left = static_cast<long>(std::floor(p.x())); + const long top = static_cast<long>(std::floor(p.y())); + const long right = left+1; + const long bottom = top+1; + + + // if the interpolation goes outside img + if (!(left >= 0 && top >= 0 && right < img.nc() && bottom < img.nr())) + return false; + + const double lr_frac = p.x() - left; + const double tb_frac = p.y() - top; + + double tl, tr, bl, br; + + tl = img[top][left].red; + tr = img[top][right].red; + bl = img[bottom][left].red; + br = img[bottom][right].red; + const double red = (1-tb_frac)*((1-lr_frac)*tl + lr_frac*tr) + + tb_frac*((1-lr_frac)*bl + lr_frac*br); + + tl = img[top][left].green; + tr = img[top][right].green; + bl = img[bottom][left].green; + br = img[bottom][right].green; + const double green = (1-tb_frac)*((1-lr_frac)*tl + lr_frac*tr) + + tb_frac*((1-lr_frac)*bl + lr_frac*br); + + tl = img[top][left].blue; + tr = img[top][right].blue; + bl = img[bottom][left].blue; + br = img[bottom][right].blue; + const double blue = (1-tb_frac)*((1-lr_frac)*tl + lr_frac*tr) + + tb_frac*((1-lr_frac)*bl + lr_frac*br); + + rgb_pixel temp; + assign_pixel(temp.red, red); + assign_pixel(temp.green, green); + assign_pixel(temp.blue, blue); + assign_pixel(result, temp); + return true; + } + }; + +// ---------------------------------------------------------------------------------------- + + class interpolate_quadratic + { + template <typename T> + struct is_rgb_image + { + const static bool value = pixel_traits<typename T::pixel_type>::rgb; + }; + + public: + + template <typename T, typename image_view_type, typename pixel_type> + typename disable_if<is_rgb_image<image_view_type>,bool>::type operator() ( + const image_view_type& img, + const dlib::vector<T,2>& p, + pixel_type& result + ) const + { + COMPILE_TIME_ASSERT(pixel_traits<typename image_view_type::pixel_type>::has_alpha == false); + + const point pp(p); + + // if the interpolation goes outside img + if (!get_rect(img).contains(grow_rect(pp,1))) + return false; + + const long r = pp.y(); + const long c = pp.x(); + + const double temp = interpolate(p-pp, + img[r-1][c-1], + img[r-1][c ], + img[r-1][c+1], + img[r ][c-1], + img[r ][c ], + img[r ][c+1], + img[r+1][c-1], + img[r+1][c ], + img[r+1][c+1]); + + assign_pixel(result, temp); + return true; + } + + template <typename T, typename image_view_type, typename pixel_type> + typename enable_if<is_rgb_image<image_view_type>,bool>::type operator() ( + const image_view_type& img, + const dlib::vector<T,2>& p, + pixel_type& result + ) const + { + COMPILE_TIME_ASSERT(pixel_traits<typename image_view_type::pixel_type>::has_alpha == false); + + const point pp(p); + + // if the interpolation goes outside img + if (!get_rect(img).contains(grow_rect(pp,1))) + return false; + + const long r = pp.y(); + const long c = pp.x(); + + const double red = interpolate(p-pp, + img[r-1][c-1].red, + img[r-1][c ].red, + img[r-1][c+1].red, + img[r ][c-1].red, + img[r ][c ].red, + img[r ][c+1].red, + img[r+1][c-1].red, + img[r+1][c ].red, + img[r+1][c+1].red); + const double green = interpolate(p-pp, + img[r-1][c-1].green, + img[r-1][c ].green, + img[r-1][c+1].green, + img[r ][c-1].green, + img[r ][c ].green, + img[r ][c+1].green, + img[r+1][c-1].green, + img[r+1][c ].green, + img[r+1][c+1].green); + const double blue = interpolate(p-pp, + img[r-1][c-1].blue, + img[r-1][c ].blue, + img[r-1][c+1].blue, + img[r ][c-1].blue, + img[r ][c ].blue, + img[r ][c+1].blue, + img[r+1][c-1].blue, + img[r+1][c ].blue, + img[r+1][c+1].blue); + + + rgb_pixel temp; + assign_pixel(temp.red, red); + assign_pixel(temp.green, green); + assign_pixel(temp.blue, blue); + assign_pixel(result, temp); + + return true; + } + + private: + + /* tl tm tr + ml mm mr + bl bm br + */ + // The above is the pixel layout in our little 3x3 neighborhood. interpolate() will + // fit a quadratic to these 9 pixels and then use that quadratic to find the interpolated + // value at point p. + inline double interpolate( + const dlib::vector<double,2>& p, + double tl, double tm, double tr, + double ml, double mm, double mr, + double bl, double bm, double br + ) const + { + matrix<double,6,1> w; + // x + w(0) = (tr + mr + br - tl - ml - bl)*0.16666666666; + // y + w(1) = (bl + bm + br - tl - tm - tr)*0.16666666666; + // x^2 + w(2) = (tl + tr + ml + mr + bl + br)*0.16666666666 - (tm + mm + bm)*0.333333333; + // x*y + w(3) = (tl - tr - bl + br)*0.25; + // y^2 + w(4) = (tl + tm + tr + bl + bm + br)*0.16666666666 - (ml + mm + mr)*0.333333333; + // 1 (constant term) + w(5) = (tm + ml + mr + bm)*0.222222222 - (tl + tr + bl + br)*0.11111111 + (mm)*0.55555556; + + const double x = p.x(); + const double y = p.y(); + + matrix<double,6,1> z; + z = x, y, x*x, x*y, y*y, 1.0; + + return dot(w,z); + } + }; + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + + class black_background + { + public: + template <typename pixel_type> + void operator() ( pixel_type& p) const { assign_pixel(p, 0); } + }; + + class white_background + { + public: + template <typename pixel_type> + void operator() ( pixel_type& p) const { assign_pixel(p, 255); } + }; + + class no_background + { + public: + template <typename pixel_type> + void operator() ( pixel_type& ) const { } + }; + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type1, + typename image_type2, + typename interpolation_type, + typename point_mapping_type, + typename background_type + > + void transform_image ( + const image_type1& in_img, + image_type2& out_img, + const interpolation_type& interp, + const point_mapping_type& map_point, + const background_type& set_background, + const rectangle& area + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( get_rect(out_img).contains(area) == true && + is_same_object(in_img, out_img) == false , + "\t void transform_image()" + << "\n\t Invalid inputs were given to this function." + << "\n\t get_rect(out_img).contains(area): " << get_rect(out_img).contains(area) + << "\n\t get_rect(out_img): " << get_rect(out_img) + << "\n\t area: " << area + << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) + ); + + const_image_view<image_type1> imgv(in_img); + image_view<image_type2> out_imgv(out_img); + + for (long r = area.top(); r <= area.bottom(); ++r) + { + for (long c = area.left(); c <= area.right(); ++c) + { + if (!interp(imgv, map_point(dlib::vector<double,2>(c,r)), out_imgv[r][c])) + set_background(out_imgv[r][c]); + } + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type1, + typename image_type2, + typename interpolation_type, + typename point_mapping_type, + typename background_type + > + void transform_image ( + const image_type1& in_img, + image_type2& out_img, + const interpolation_type& interp, + const point_mapping_type& map_point, + const background_type& set_background + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( is_same_object(in_img, out_img) == false , + "\t void transform_image()" + << "\n\t Invalid inputs were given to this function." + << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) + ); + + transform_image(in_img, out_img, interp, map_point, set_background, get_rect(out_img)); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type1, + typename image_type2, + typename interpolation_type, + typename point_mapping_type + > + void transform_image ( + const image_type1& in_img, + image_type2& out_img, + const interpolation_type& interp, + const point_mapping_type& map_point + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( is_same_object(in_img, out_img) == false , + "\t void transform_image()" + << "\n\t Invalid inputs were given to this function." + << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) + ); + + + transform_image(in_img, out_img, interp, map_point, black_background(), get_rect(out_img)); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type1, + typename image_type2, + typename interpolation_type + > + point_transform_affine rotate_image ( + const image_type1& in_img, + image_type2& out_img, + double angle, + const interpolation_type& interp + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( is_same_object(in_img, out_img) == false , + "\t point_transform_affine rotate_image()" + << "\n\t Invalid inputs were given to this function." + << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) + ); + + const rectangle rimg = get_rect(in_img); + + + // figure out bounding box for rotated rectangle + rectangle rect; + rect += rotate_point(center(rimg), rimg.tl_corner(), -angle); + rect += rotate_point(center(rimg), rimg.tr_corner(), -angle); + rect += rotate_point(center(rimg), rimg.bl_corner(), -angle); + rect += rotate_point(center(rimg), rimg.br_corner(), -angle); + set_image_size(out_img, rect.height(), rect.width()); + + const matrix<double,2,2> R = rotation_matrix(angle); + + point_transform_affine trans = point_transform_affine(R, -R*dcenter(get_rect(out_img)) + dcenter(rimg)); + transform_image(in_img, out_img, interp, trans); + return inv(trans); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type1, + typename image_type2 + > + point_transform_affine rotate_image ( + const image_type1& in_img, + image_type2& out_img, + double angle + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( is_same_object(in_img, out_img) == false , + "\t point_transform_affine rotate_image()" + << "\n\t Invalid inputs were given to this function." + << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) + ); + + return rotate_image(in_img, out_img, angle, interpolate_quadratic()); + } + +// ---------------------------------------------------------------------------------------- + + namespace impl + { + class helper_resize_image + { + public: + helper_resize_image( + double x_scale_, + double y_scale_ + ): + x_scale(x_scale_), + y_scale(y_scale_) + {} + + dlib::vector<double,2> operator() ( + const dlib::vector<double,2>& p + ) const + { + return dlib::vector<double,2>(p.x()*x_scale, p.y()*y_scale); + } + + private: + const double x_scale; + const double y_scale; + }; + } + + template < + typename image_type1, + typename image_type2, + typename interpolation_type + > + void resize_image ( + const image_type1& in_img, + image_type2& out_img, + const interpolation_type& interp + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( is_same_object(in_img, out_img) == false , + "\t void resize_image()" + << "\n\t Invalid inputs were given to this function." + << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) + ); + + const double x_scale = (num_columns(in_img)-1)/(double)std::max<long>((num_columns(out_img)-1),1); + const double y_scale = (num_rows(in_img)-1)/(double)std::max<long>((num_rows(out_img)-1),1); + transform_image(in_img, out_img, interp, + dlib::impl::helper_resize_image(x_scale,y_scale)); + } + +// ---------------------------------------------------------------------------------------- + + template <typename image_type> + struct is_rgb_image { const static bool value = pixel_traits<typename image_traits<image_type>::pixel_type>::rgb; }; + template <typename image_type> + struct is_grayscale_image { const static bool value = pixel_traits<typename image_traits<image_type>::pixel_type>::grayscale; }; + + // This is an optimized version of resize_image for the case where bilinear + // interpolation is used. + template < + typename image_type1, + typename image_type2 + > + typename disable_if_c<(is_rgb_image<image_type1>::value&&is_rgb_image<image_type2>::value) || + (is_grayscale_image<image_type1>::value&&is_grayscale_image<image_type2>::value)>::type + resize_image ( + const image_type1& in_img_, + image_type2& out_img_, + interpolate_bilinear + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( is_same_object(in_img_, out_img_) == false , + "\t void resize_image()" + << "\n\t Invalid inputs were given to this function." + << "\n\t is_same_object(in_img_, out_img_): " << is_same_object(in_img_, out_img_) + ); + + const_image_view<image_type1> in_img(in_img_); + image_view<image_type2> out_img(out_img_); + + if (out_img.size() == 0 || in_img.size() == 0) + return; + + + typedef typename image_traits<image_type1>::pixel_type T; + typedef typename image_traits<image_type2>::pixel_type U; + const double x_scale = (in_img.nc()-1)/(double)std::max<long>((out_img.nc()-1),1); + const double y_scale = (in_img.nr()-1)/(double)std::max<long>((out_img.nr()-1),1); + double y = -y_scale; + for (long r = 0; r < out_img.nr(); ++r) + { + y += y_scale; + const long top = static_cast<long>(std::floor(y)); + const long bottom = std::min(top+1, in_img.nr()-1); + const double tb_frac = y - top; + double x = -x_scale; + if (pixel_traits<U>::grayscale) + { + for (long c = 0; c < out_img.nc(); ++c) + { + x += x_scale; + const long left = static_cast<long>(std::floor(x)); + const long right = std::min(left+1, in_img.nc()-1); + const double lr_frac = x - left; + + double tl = 0, tr = 0, bl = 0, br = 0; + + assign_pixel(tl, in_img[top][left]); + assign_pixel(tr, in_img[top][right]); + assign_pixel(bl, in_img[bottom][left]); + assign_pixel(br, in_img[bottom][right]); + + double temp = (1-tb_frac)*((1-lr_frac)*tl + lr_frac*tr) + + tb_frac*((1-lr_frac)*bl + lr_frac*br); + + assign_pixel(out_img[r][c], temp); + } + } + else + { + for (long c = 0; c < out_img.nc(); ++c) + { + x += x_scale; + const long left = static_cast<long>(std::floor(x)); + const long right = std::min(left+1, in_img.nc()-1); + const double lr_frac = x - left; + + const T tl = in_img[top][left]; + const T tr = in_img[top][right]; + const T bl = in_img[bottom][left]; + const T br = in_img[bottom][right]; + + T temp; + assign_pixel(temp, 0); + vector_to_pixel(temp, + (1-tb_frac)*((1-lr_frac)*pixel_to_vector<double>(tl) + lr_frac*pixel_to_vector<double>(tr)) + + tb_frac*((1-lr_frac)*pixel_to_vector<double>(bl) + lr_frac*pixel_to_vector<double>(br))); + assign_pixel(out_img[r][c], temp); + } + } + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type1, + typename image_type2 + > + struct images_have_same_pixel_types + { + typedef typename image_traits<image_type1>::pixel_type ptype1; + typedef typename image_traits<image_type2>::pixel_type ptype2; + const static bool value = is_same_type<ptype1, ptype2>::value; + }; + + template < + typename image_type, + typename image_type2 + > + typename enable_if_c<is_grayscale_image<image_type>::value && is_grayscale_image<image_type2>::value && images_have_same_pixel_types<image_type,image_type2>::value>::type + resize_image ( + const image_type& in_img_, + image_type2& out_img_, + interpolate_bilinear + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( is_same_object(in_img_, out_img_) == false , + "\t void resize_image()" + << "\n\t Invalid inputs were given to this function." + << "\n\t is_same_object(in_img_, out_img_): " << is_same_object(in_img_, out_img_) + ); + + const_image_view<image_type> in_img(in_img_); + image_view<image_type2> out_img(out_img_); + + if (out_img.size() == 0 || in_img.size() == 0) + return; + + typedef typename image_traits<image_type>::pixel_type T; + const double x_scale = (in_img.nc()-1)/(double)std::max<long>((out_img.nc()-1),1); + const double y_scale = (in_img.nr()-1)/(double)std::max<long>((out_img.nr()-1),1); + double y = -y_scale; + for (long r = 0; r < out_img.nr(); ++r) + { + y += y_scale; + const long top = static_cast<long>(std::floor(y)); + const long bottom = std::min(top+1, in_img.nr()-1); + const double tb_frac = y - top; + double x = -4*x_scale; + + const simd4f _tb_frac = tb_frac; + const simd4f _inv_tb_frac = 1-tb_frac; + const simd4f _x_scale = 4*x_scale; + simd4f _x(x, x+x_scale, x+2*x_scale, x+3*x_scale); + long c = 0; + for (;; c+=4) + { + _x += _x_scale; + simd4i left = simd4i(_x); + + simd4f _lr_frac = _x-left; + simd4f _inv_lr_frac = 1-_lr_frac; + simd4i right = left+1; + + simd4f tlf = _inv_tb_frac*_inv_lr_frac; + simd4f trf = _inv_tb_frac*_lr_frac; + simd4f blf = _tb_frac*_inv_lr_frac; + simd4f brf = _tb_frac*_lr_frac; + + int32 fleft[4]; + int32 fright[4]; + left.store(fleft); + right.store(fright); + + if (fright[3] >= in_img.nc()) + break; + simd4f tl(in_img[top][fleft[0]], in_img[top][fleft[1]], in_img[top][fleft[2]], in_img[top][fleft[3]]); + simd4f tr(in_img[top][fright[0]], in_img[top][fright[1]], in_img[top][fright[2]], in_img[top][fright[3]]); + simd4f bl(in_img[bottom][fleft[0]], in_img[bottom][fleft[1]], in_img[bottom][fleft[2]], in_img[bottom][fleft[3]]); + simd4f br(in_img[bottom][fright[0]], in_img[bottom][fright[1]], in_img[bottom][fright[2]], in_img[bottom][fright[3]]); + + simd4f out = simd4f(tlf*tl + trf*tr + blf*bl + brf*br); + float fout[4]; + out.store(fout); + + out_img[r][c] = static_cast<T>(fout[0]); + out_img[r][c+1] = static_cast<T>(fout[1]); + out_img[r][c+2] = static_cast<T>(fout[2]); + out_img[r][c+3] = static_cast<T>(fout[3]); + } + x = -x_scale + c*x_scale; + for (; c < out_img.nc(); ++c) + { + x += x_scale; + const long left = static_cast<long>(std::floor(x)); + const long right = std::min(left+1, in_img.nc()-1); + const float lr_frac = x - left; + + float tl = 0, tr = 0, bl = 0, br = 0; + + assign_pixel(tl, in_img[top][left]); + assign_pixel(tr, in_img[top][right]); + assign_pixel(bl, in_img[bottom][left]); + assign_pixel(br, in_img[bottom][right]); + + float temp = (1-tb_frac)*((1-lr_frac)*tl + lr_frac*tr) + + tb_frac*((1-lr_frac)*bl + lr_frac*br); + + assign_pixel(out_img[r][c], temp); + } + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + typename enable_if<is_rgb_image<image_type> >::type resize_image ( + const image_type& in_img_, + image_type& out_img_, + interpolate_bilinear + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( is_same_object(in_img_, out_img_) == false , + "\t void resize_image()" + << "\n\t Invalid inputs were given to this function." + << "\n\t is_same_object(in_img_, out_img_): " << is_same_object(in_img_, out_img_) + ); + + const_image_view<image_type> in_img(in_img_); + image_view<image_type> out_img(out_img_); + + if (out_img.size() == 0 || in_img.size() == 0) + return; + + + typedef typename image_traits<image_type>::pixel_type T; + const double x_scale = (in_img.nc()-1)/(double)std::max<long>((out_img.nc()-1),1); + const double y_scale = (in_img.nr()-1)/(double)std::max<long>((out_img.nr()-1),1); + double y = -y_scale; + for (long r = 0; r < out_img.nr(); ++r) + { + y += y_scale; + const long top = static_cast<long>(std::floor(y)); + const long bottom = std::min(top+1, in_img.nr()-1); + const double tb_frac = y - top; + double x = -4*x_scale; + + const simd4f _tb_frac = tb_frac; + const simd4f _inv_tb_frac = 1-tb_frac; + const simd4f _x_scale = 4*x_scale; + simd4f _x(x, x+x_scale, x+2*x_scale, x+3*x_scale); + long c = 0; + for (;; c+=4) + { + _x += _x_scale; + simd4i left = simd4i(_x); + simd4f lr_frac = _x-left; + simd4f _inv_lr_frac = 1-lr_frac; + simd4i right = left+1; + + simd4f tlf = _inv_tb_frac*_inv_lr_frac; + simd4f trf = _inv_tb_frac*lr_frac; + simd4f blf = _tb_frac*_inv_lr_frac; + simd4f brf = _tb_frac*lr_frac; + + int32 fleft[4]; + int32 fright[4]; + left.store(fleft); + right.store(fright); + + if (fright[3] >= in_img.nc()) + break; + simd4f tl(in_img[top][fleft[0]].red, in_img[top][fleft[1]].red, in_img[top][fleft[2]].red, in_img[top][fleft[3]].red); + simd4f tr(in_img[top][fright[0]].red, in_img[top][fright[1]].red, in_img[top][fright[2]].red, in_img[top][fright[3]].red); + simd4f bl(in_img[bottom][fleft[0]].red, in_img[bottom][fleft[1]].red, in_img[bottom][fleft[2]].red, in_img[bottom][fleft[3]].red); + simd4f br(in_img[bottom][fright[0]].red, in_img[bottom][fright[1]].red, in_img[bottom][fright[2]].red, in_img[bottom][fright[3]].red); + + simd4i out = simd4i(tlf*tl + trf*tr + blf*bl + brf*br); + int32 fout[4]; + out.store(fout); + + out_img[r][c].red = static_cast<unsigned char>(fout[0]); + out_img[r][c+1].red = static_cast<unsigned char>(fout[1]); + out_img[r][c+2].red = static_cast<unsigned char>(fout[2]); + out_img[r][c+3].red = static_cast<unsigned char>(fout[3]); + + + tl = simd4f(in_img[top][fleft[0]].green, in_img[top][fleft[1]].green, in_img[top][fleft[2]].green, in_img[top][fleft[3]].green); + tr = simd4f(in_img[top][fright[0]].green, in_img[top][fright[1]].green, in_img[top][fright[2]].green, in_img[top][fright[3]].green); + bl = simd4f(in_img[bottom][fleft[0]].green, in_img[bottom][fleft[1]].green, in_img[bottom][fleft[2]].green, in_img[bottom][fleft[3]].green); + br = simd4f(in_img[bottom][fright[0]].green, in_img[bottom][fright[1]].green, in_img[bottom][fright[2]].green, in_img[bottom][fright[3]].green); + out = simd4i(tlf*tl + trf*tr + blf*bl + brf*br); + out.store(fout); + out_img[r][c].green = static_cast<unsigned char>(fout[0]); + out_img[r][c+1].green = static_cast<unsigned char>(fout[1]); + out_img[r][c+2].green = static_cast<unsigned char>(fout[2]); + out_img[r][c+3].green = static_cast<unsigned char>(fout[3]); + + + tl = simd4f(in_img[top][fleft[0]].blue, in_img[top][fleft[1]].blue, in_img[top][fleft[2]].blue, in_img[top][fleft[3]].blue); + tr = simd4f(in_img[top][fright[0]].blue, in_img[top][fright[1]].blue, in_img[top][fright[2]].blue, in_img[top][fright[3]].blue); + bl = simd4f(in_img[bottom][fleft[0]].blue, in_img[bottom][fleft[1]].blue, in_img[bottom][fleft[2]].blue, in_img[bottom][fleft[3]].blue); + br = simd4f(in_img[bottom][fright[0]].blue, in_img[bottom][fright[1]].blue, in_img[bottom][fright[2]].blue, in_img[bottom][fright[3]].blue); + out = simd4i(tlf*tl + trf*tr + blf*bl + brf*br); + out.store(fout); + out_img[r][c].blue = static_cast<unsigned char>(fout[0]); + out_img[r][c+1].blue = static_cast<unsigned char>(fout[1]); + out_img[r][c+2].blue = static_cast<unsigned char>(fout[2]); + out_img[r][c+3].blue = static_cast<unsigned char>(fout[3]); + } + x = -x_scale + c*x_scale; + for (; c < out_img.nc(); ++c) + { + x += x_scale; + const long left = static_cast<long>(std::floor(x)); + const long right = std::min(left+1, in_img.nc()-1); + const double lr_frac = x - left; + + const T tl = in_img[top][left]; + const T tr = in_img[top][right]; + const T bl = in_img[bottom][left]; + const T br = in_img[bottom][right]; + + T temp; + assign_pixel(temp, 0); + vector_to_pixel(temp, + (1-tb_frac)*((1-lr_frac)*pixel_to_vector<double>(tl) + lr_frac*pixel_to_vector<double>(tr)) + + tb_frac*((1-lr_frac)*pixel_to_vector<double>(bl) + lr_frac*pixel_to_vector<double>(br))); + assign_pixel(out_img[r][c], temp); + } + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type1, + typename image_type2 + > + void resize_image ( + const image_type1& in_img, + image_type2& out_img + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( is_same_object(in_img, out_img) == false , + "\t void resize_image()" + << "\n\t Invalid inputs were given to this function." + << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) + ); + + resize_image(in_img, out_img, interpolate_bilinear()); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + void resize_image ( + double size_scale, + image_type& img + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( size_scale > 0 , + "\t void resize_image()" + << "\n\t Invalid inputs were given to this function." + << "\n\t size_scale: " << size_scale + ); + + image_type temp; + set_image_size(temp, std::round(size_scale*num_rows(img)), std::round(size_scale*num_columns(img))); + resize_image(img, temp); + swap(img, temp); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type1, + typename image_type2 + > + point_transform_affine flip_image_left_right ( + const image_type1& in_img, + image_type2& out_img + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( is_same_object(in_img, out_img) == false , + "\t void flip_image_left_right()" + << "\n\t Invalid inputs were given to this function." + << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) + ); + + assign_image(out_img, fliplr(mat(in_img))); + std::vector<dlib::vector<double,2> > from, to; + rectangle r = get_rect(in_img); + from.push_back(r.tl_corner()); to.push_back(r.tr_corner()); + from.push_back(r.bl_corner()); to.push_back(r.br_corner()); + from.push_back(r.tr_corner()); to.push_back(r.tl_corner()); + from.push_back(r.br_corner()); to.push_back(r.bl_corner()); + return find_affine_transform(from,to); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + point_transform_affine flip_image_left_right ( + image_type& img + ) + { + image_type temp; + auto tform = flip_image_left_right(img, temp); + swap(temp,img); + return tform; + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type1, + typename image_type2 + > + void flip_image_up_down ( + const image_type1& in_img, + image_type2& out_img + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( is_same_object(in_img, out_img) == false , + "\t void flip_image_up_down()" + << "\n\t Invalid inputs were given to this function." + << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) + ); + + assign_image(out_img, flipud(mat(in_img))); + } + +// ---------------------------------------------------------------------------------------- + + namespace impl + { + inline rectangle flip_rect_left_right ( + const rectangle& rect, + const rectangle& window + ) + { + rectangle temp; + temp.top() = rect.top(); + temp.bottom() = rect.bottom(); + + const long left_dist = rect.left()-window.left(); + + temp.right() = window.right()-left_dist; + temp.left() = temp.right()-rect.width()+1; + return temp; + } + + inline rectangle tform_object ( + const point_transform_affine& tran, + const rectangle& rect + ) + { + return centered_rect(tran(center(rect)), rect.width(), rect.height()); + } + + inline mmod_rect tform_object ( + const point_transform_affine& tran, + mmod_rect rect + ) + { + rect.rect = tform_object(tran, rect.rect); + return rect; + } + + inline full_object_detection tform_object( + const point_transform_affine& tran, + const full_object_detection& obj + ) + { + std::vector<point> parts; + parts.reserve(obj.num_parts()); + for (unsigned long i = 0; i < obj.num_parts(); ++i) + { + if (obj.part(i) != OBJECT_PART_NOT_PRESENT) + parts.push_back(tran(obj.part(i))); + else + parts.push_back(OBJECT_PART_NOT_PRESENT); + } + return full_object_detection(tform_object(tran,obj.get_rect()), parts); + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_array_type, + typename T + > + void add_image_left_right_flips ( + image_array_type& images, + std::vector<std::vector<T> >& objects + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( images.size() == objects.size(), + "\t void add_image_left_right_flips()" + << "\n\t Invalid inputs were given to this function." + << "\n\t images.size(): " << images.size() + << "\n\t objects.size(): " << objects.size() + ); + + typename image_array_type::value_type temp; + std::vector<T> rects; + + const unsigned long num = images.size(); + for (unsigned long j = 0; j < num; ++j) + { + const point_transform_affine tran = flip_image_left_right(images[j], temp); + + rects.clear(); + for (unsigned long i = 0; i < objects[j].size(); ++i) + rects.push_back(impl::tform_object(tran, objects[j][i])); + + images.push_back(std::move(temp)); + objects.push_back(rects); + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_array_type, + typename T, + typename U + > + void add_image_left_right_flips ( + image_array_type& images, + std::vector<std::vector<T> >& objects, + std::vector<std::vector<U> >& objects2 + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( images.size() == objects.size() && + images.size() == objects2.size(), + "\t void add_image_left_right_flips()" + << "\n\t Invalid inputs were given to this function." + << "\n\t images.size(): " << images.size() + << "\n\t objects.size(): " << objects.size() + << "\n\t objects2.size(): " << objects2.size() + ); + + typename image_array_type::value_type temp; + std::vector<T> rects; + std::vector<U> rects2; + + const unsigned long num = images.size(); + for (unsigned long j = 0; j < num; ++j) + { + const point_transform_affine tran = flip_image_left_right(images[j], temp); + images.push_back(std::move(temp)); + + rects.clear(); + for (unsigned long i = 0; i < objects[j].size(); ++i) + rects.push_back(impl::tform_object(tran, objects[j][i])); + objects.push_back(rects); + + rects2.clear(); + for (unsigned long i = 0; i < objects2[j].size(); ++i) + rects2.push_back(impl::tform_object(tran, objects2[j][i])); + objects2.push_back(rects2); + } + } + +// ---------------------------------------------------------------------------------------- + + template <typename image_array_type> + void flip_image_dataset_left_right ( + image_array_type& images, + std::vector<std::vector<rectangle> >& objects + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( images.size() == objects.size(), + "\t void flip_image_dataset_left_right()" + << "\n\t Invalid inputs were given to this function." + << "\n\t images.size(): " << images.size() + << "\n\t objects.size(): " << objects.size() + ); + + typename image_array_type::value_type temp; + for (unsigned long i = 0; i < images.size(); ++i) + { + flip_image_left_right(images[i], temp); + swap(temp,images[i]); + for (unsigned long j = 0; j < objects[i].size(); ++j) + { + objects[i][j] = impl::flip_rect_left_right(objects[i][j], get_rect(images[i])); + } + } + } + +// ---------------------------------------------------------------------------------------- + + template <typename image_array_type> + void flip_image_dataset_left_right ( + image_array_type& images, + std::vector<std::vector<rectangle> >& objects, + std::vector<std::vector<rectangle> >& objects2 + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( images.size() == objects.size() && + images.size() == objects2.size(), + "\t void flip_image_dataset_left_right()" + << "\n\t Invalid inputs were given to this function." + << "\n\t images.size(): " << images.size() + << "\n\t objects.size(): " << objects.size() + << "\n\t objects2.size(): " << objects2.size() + ); + + typename image_array_type::value_type temp; + for (unsigned long i = 0; i < images.size(); ++i) + { + flip_image_left_right(images[i], temp); + swap(temp, images[i]); + for (unsigned long j = 0; j < objects[i].size(); ++j) + { + objects[i][j] = impl::flip_rect_left_right(objects[i][j], get_rect(images[i])); + } + for (unsigned long j = 0; j < objects2[i].size(); ++j) + { + objects2[i][j] = impl::flip_rect_left_right(objects2[i][j], get_rect(images[i])); + } + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename pyramid_type, + typename image_array_type + > + void upsample_image_dataset ( + image_array_type& images, + std::vector<std::vector<rectangle> >& objects, + unsigned long max_image_size = std::numeric_limits<unsigned long>::max() + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( images.size() == objects.size(), + "\t void upsample_image_dataset()" + << "\n\t Invalid inputs were given to this function." + << "\n\t images.size(): " << images.size() + << "\n\t objects.size(): " << objects.size() + ); + + typename image_array_type::value_type temp; + pyramid_type pyr; + for (unsigned long i = 0; i < images.size(); ++i) + { + const unsigned long img_size = num_rows(images[i])*num_columns(images[i]); + if (img_size <= max_image_size) + { + pyramid_up(images[i], temp, pyr); + swap(temp, images[i]); + for (unsigned long j = 0; j < objects[i].size(); ++j) + { + objects[i][j] = pyr.rect_up(objects[i][j]); + } + } + } + } + + template < + typename pyramid_type, + typename image_array_type + > + void upsample_image_dataset ( + image_array_type& images, + std::vector<std::vector<mmod_rect>>& objects, + unsigned long max_image_size = std::numeric_limits<unsigned long>::max() + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( images.size() == objects.size(), + "\t void upsample_image_dataset()" + << "\n\t Invalid inputs were given to this function." + << "\n\t images.size(): " << images.size() + << "\n\t objects.size(): " << objects.size() + ); + + typename image_array_type::value_type temp; + pyramid_type pyr; + for (unsigned long i = 0; i < images.size(); ++i) + { + const unsigned long img_size = num_rows(images[i])*num_columns(images[i]); + if (img_size <= max_image_size) + { + pyramid_up(images[i], temp, pyr); + swap(temp, images[i]); + for (unsigned long j = 0; j < objects[i].size(); ++j) + { + objects[i][j].rect = pyr.rect_up(objects[i][j].rect); + } + } + } + } + + template < + typename pyramid_type, + typename image_array_type + > + void upsample_image_dataset ( + image_array_type& images, + std::vector<std::vector<rectangle> >& objects, + std::vector<std::vector<rectangle> >& objects2, + unsigned long max_image_size = std::numeric_limits<unsigned long>::max() + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( images.size() == objects.size() && + images.size() == objects2.size(), + "\t void upsample_image_dataset()" + << "\n\t Invalid inputs were given to this function." + << "\n\t images.size(): " << images.size() + << "\n\t objects.size(): " << objects.size() + << "\n\t objects2.size(): " << objects2.size() + ); + + typename image_array_type::value_type temp; + pyramid_type pyr; + for (unsigned long i = 0; i < images.size(); ++i) + { + const unsigned long img_size = num_rows(images[i])*num_columns(images[i]); + if (img_size <= max_image_size) + { + pyramid_up(images[i], temp, pyr); + swap(temp, images[i]); + for (unsigned long j = 0; j < objects[i].size(); ++j) + { + objects[i][j] = pyr.rect_up(objects[i][j]); + } + for (unsigned long j = 0; j < objects2[i].size(); ++j) + { + objects2[i][j] = pyr.rect_up(objects2[i][j]); + } + } + } + } + +// ---------------------------------------------------------------------------------------- + + template <typename image_array_type> + void rotate_image_dataset ( + double angle, + image_array_type& images, + std::vector<std::vector<rectangle> >& objects + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( images.size() == objects.size(), + "\t void rotate_image_dataset()" + << "\n\t Invalid inputs were given to this function." + << "\n\t images.size(): " << images.size() + << "\n\t objects.size(): " << objects.size() + ); + + typename image_array_type::value_type temp; + for (unsigned long i = 0; i < images.size(); ++i) + { + const point_transform_affine tran = rotate_image(images[i], temp, angle); + swap(temp, images[i]); + for (unsigned long j = 0; j < objects[i].size(); ++j) + { + const rectangle rect = objects[i][j]; + objects[i][j] = centered_rect(tran(center(rect)), rect.width(), rect.height()); + } + } + } + + template <typename image_array_type> + void rotate_image_dataset ( + double angle, + image_array_type& images, + std::vector<std::vector<rectangle> >& objects, + std::vector<std::vector<rectangle> >& objects2 + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( images.size() == objects.size() && + images.size() == objects2.size(), + "\t void rotate_image_dataset()" + << "\n\t Invalid inputs were given to this function." + << "\n\t images.size(): " << images.size() + << "\n\t objects.size(): " << objects.size() + << "\n\t objects2.size(): " << objects2.size() + ); + + typename image_array_type::value_type temp; + for (unsigned long i = 0; i < images.size(); ++i) + { + const point_transform_affine tran = rotate_image(images[i], temp, angle); + swap(temp, images[i]); + for (unsigned long j = 0; j < objects[i].size(); ++j) + { + const rectangle rect = objects[i][j]; + objects[i][j] = centered_rect(tran(center(rect)), rect.width(), rect.height()); + } + for (unsigned long j = 0; j < objects2[i].size(); ++j) + { + const rectangle rect = objects2[i][j]; + objects2[i][j] = centered_rect(tran(center(rect)), rect.width(), rect.height()); + } + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_array_type, + typename EXP, + typename T, + typename U + > + void add_image_rotations ( + const matrix_exp<EXP>& angles, + image_array_type& images, + std::vector<std::vector<T> >& objects, + std::vector<std::vector<U> >& objects2 + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( is_vector(angles) && angles.size() > 0 && + images.size() == objects.size() && + images.size() == objects2.size(), + "\t void add_image_rotations()" + << "\n\t Invalid inputs were given to this function." + << "\n\t is_vector(angles): " << is_vector(angles) + << "\n\t angles.size(): " << angles.size() + << "\n\t images.size(): " << images.size() + << "\n\t objects.size(): " << objects.size() + << "\n\t objects2.size(): " << objects2.size() + ); + + image_array_type new_images; + std::vector<std::vector<T> > new_objects; + std::vector<std::vector<U> > new_objects2; + + using namespace impl; + + std::vector<T> objtemp; + std::vector<U> objtemp2; + typename image_array_type::value_type temp; + for (long i = 0; i < angles.size(); ++i) + { + for (unsigned long j = 0; j < images.size(); ++j) + { + const point_transform_affine tran = rotate_image(images[j], temp, angles(i)); + new_images.push_back(std::move(temp)); + + objtemp.clear(); + for (unsigned long k = 0; k < objects[j].size(); ++k) + objtemp.push_back(tform_object(tran, objects[j][k])); + new_objects.push_back(objtemp); + + objtemp2.clear(); + for (unsigned long k = 0; k < objects2[j].size(); ++k) + objtemp2.push_back(tform_object(tran, objects2[j][k])); + new_objects2.push_back(objtemp2); + } + } + + new_images.swap(images); + new_objects.swap(objects); + new_objects2.swap(objects2); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_array_type, + typename EXP, + typename T + > + void add_image_rotations ( + const matrix_exp<EXP>& angles, + image_array_type& images, + std::vector<std::vector<T> >& objects + ) + { + std::vector<std::vector<T> > objects2(objects.size()); + add_image_rotations(angles, images, objects, objects2); + } + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + + template < + typename image_type1, + typename image_type2, + typename pyramid_type, + typename interpolation_type + > + void pyramid_up ( + const image_type1& in_img, + image_type2& out_img, + const pyramid_type& pyr, + const interpolation_type& interp + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( is_same_object(in_img, out_img) == false , + "\t void pyramid_up()" + << "\n\t Invalid inputs were given to this function." + << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) + ); + + if (image_size(in_img) == 0) + { + set_image_size(out_img, 0, 0); + return; + } + + rectangle rect = get_rect(in_img); + rectangle uprect = pyr.rect_up(rect); + if (uprect.is_empty()) + { + set_image_size(out_img, 0, 0); + return; + } + set_image_size(out_img, uprect.bottom()+1, uprect.right()+1); + + resize_image(in_img, out_img, interp); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type1, + typename image_type2, + typename pyramid_type + > + void pyramid_up ( + const image_type1& in_img, + image_type2& out_img, + const pyramid_type& pyr + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( is_same_object(in_img, out_img) == false , + "\t void pyramid_up()" + << "\n\t Invalid inputs were given to this function." + << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) + ); + + pyramid_up(in_img, out_img, pyr, interpolate_bilinear()); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type, + typename pyramid_type + > + void pyramid_up ( + image_type& img, + const pyramid_type& pyr + ) + { + image_type temp; + pyramid_up(img, temp, pyr); + swap(temp, img); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + void pyramid_up ( + image_type& img + ) + { + pyramid_down<2> pyr; + pyramid_up(img, pyr); + } + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + + struct chip_dims + { + chip_dims ( + unsigned long rows_, + unsigned long cols_ + ) : rows(rows_), cols(cols_) { } + + unsigned long rows; + unsigned long cols; + }; + + struct chip_details + { + chip_details() : angle(0), rows(0), cols(0) {} + chip_details(const rectangle& rect_) : rect(rect_),angle(0), rows(rect_.height()), cols(rect_.width()) {} + chip_details(const drectangle& rect_) : rect(rect_),angle(0), + rows((unsigned long)(rect_.height()+0.5)), cols((unsigned long)(rect_.width()+0.5)) {} + chip_details(const drectangle& rect_, unsigned long size) : rect(rect_),angle(0) + { compute_dims_from_size(size); } + chip_details(const drectangle& rect_, unsigned long size, double angle_) : rect(rect_),angle(angle_) + { compute_dims_from_size(size); } + + chip_details(const drectangle& rect_, const chip_dims& dims) : + rect(rect_),angle(0),rows(dims.rows), cols(dims.cols) {} + chip_details(const drectangle& rect_, const chip_dims& dims, double angle_) : + rect(rect_),angle(angle_),rows(dims.rows), cols(dims.cols) {} + + template <typename T> + chip_details( + const std::vector<dlib::vector<T,2> >& chip_points, + const std::vector<dlib::vector<T,2> >& img_points, + const chip_dims& dims + ) : + rows(dims.rows), cols(dims.cols) + { + DLIB_CASSERT( chip_points.size() == img_points.size() && chip_points.size() >= 2, + "\t chip_details::chip_details(chip_points,img_points,dims)" + << "\n\t Invalid inputs were given to this function." + << "\n\t chip_points.size(): " << chip_points.size() + << "\n\t img_points.size(): " << img_points.size() + ); + + const point_transform_affine tform = find_similarity_transform(chip_points,img_points); + dlib::vector<double,2> p(1,0); + p = tform.get_m()*p; + + // There are only 3 things happening in a similarity transform. There is a + // rescaling, a rotation, and a translation. So here we pick out the scale and + // rotation parameters. + angle = std::atan2(p.y(),p.x()); + // Note that the translation and scale part are represented by the extraction + // rectangle. So here we build the appropriate rectangle. + const double scale = length(p); + rect = centered_drect(tform(point(dims.cols,dims.rows)/2.0), + dims.cols*scale, + dims.rows*scale); + } + + + drectangle rect; + double angle; + unsigned long rows; + unsigned long cols; + + inline unsigned long size() const + { + return rows*cols; + } + + private: + void compute_dims_from_size ( + unsigned long size + ) + { + const double relative_size = std::sqrt(size/(double)rect.area()); + rows = static_cast<unsigned long>(rect.height()*relative_size + 0.5); + cols = static_cast<unsigned long>(size/(double)rows + 0.5); + rows = std::max(1ul,rows); + cols = std::max(1ul,cols); + } + }; + +// ---------------------------------------------------------------------------------------- + + inline point_transform_affine get_mapping_to_chip ( + const chip_details& details + ) + { + std::vector<dlib::vector<double,2> > from, to; + point p1(0,0); + point p2(details.cols-1,0); + point p3(details.cols-1, details.rows-1); + to.push_back(p1); + from.push_back(rotate_point<double>(center(details.rect),details.rect.tl_corner(),details.angle)); + to.push_back(p2); + from.push_back(rotate_point<double>(center(details.rect),details.rect.tr_corner(),details.angle)); + to.push_back(p3); + from.push_back(rotate_point<double>(center(details.rect),details.rect.br_corner(),details.angle)); + return find_affine_transform(from, to); + } + +// ---------------------------------------------------------------------------------------- + + inline full_object_detection map_det_to_chip( + const full_object_detection& det, + const chip_details& details + ) + { + point_transform_affine tform = get_mapping_to_chip(details); + full_object_detection res(det); + // map the parts + for (unsigned long l = 0; l < det.num_parts(); ++l) + { + if (det.part(l) != OBJECT_PART_NOT_PRESENT) + res.part(l) = tform(det.part(l)); + else + res.part(l) = OBJECT_PART_NOT_PRESENT; + } + // map the main rectangle + rectangle rect; + rect += tform(det.get_rect().tl_corner()); + rect += tform(det.get_rect().tr_corner()); + rect += tform(det.get_rect().bl_corner()); + rect += tform(det.get_rect().br_corner()); + res.get_rect() = rect; + return res; + } + +// ---------------------------------------------------------------------------------------- + + namespace impl + { + template < + typename image_type1, + typename image_type2 + > + void basic_extract_image_chip ( + const image_type1& img, + const rectangle& location, + image_type2& chip + ) + /*! + ensures + - This function doesn't do any scaling or rotating. It just pulls out the + chip in the given rectangle. This also means the output image has the + same dimensions as the location rectangle. + !*/ + { + const_image_view<image_type1> vimg(img); + image_view<image_type2> vchip(chip); + + vchip.set_size(location.height(), location.width()); + + // location might go outside img so clip it + rectangle area = location.intersect(get_rect(img)); + + // find the part of the chip that corresponds to area in img. + rectangle chip_area = translate_rect(area, -location.tl_corner()); + + zero_border_pixels(chip, chip_area); + // now pull out the contents of area/chip_area. + for (long r = chip_area.top(), rr = area.top(); r <= chip_area.bottom(); ++r,++rr) + { + for (long c = chip_area.left(), cc = area.left(); c <= chip_area.right(); ++c,++cc) + { + assign_pixel(vchip[r][c], vimg[rr][cc]); + } + } + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type1, + typename image_type2, + typename interpolation_type + > + void extract_image_chips ( + const image_type1& img, + const std::vector<chip_details>& chip_locations, + dlib::array<image_type2>& chips, + const interpolation_type& interp + ) + { + // make sure requires clause is not broken +#ifdef ENABLE_ASSERTS + for (unsigned long i = 0; i < chip_locations.size(); ++i) + { + DLIB_CASSERT(chip_locations[i].size() != 0 && + chip_locations[i].rect.is_empty() == false, + "\t void extract_image_chips()" + << "\n\t Invalid inputs were given to this function." + << "\n\t chip_locations["<<i<<"].size(): " << chip_locations[i].size() + << "\n\t chip_locations["<<i<<"].rect.is_empty(): " << chip_locations[i].rect.is_empty() + ); + } +#endif + + pyramid_down<2> pyr; + long max_depth = 0; + // If the chip is supposed to be much smaller than the source subwindow then you + // can't just extract it using bilinear interpolation since at a high enough + // downsampling amount it would effectively turn into nearest neighbor + // interpolation. So we use an image pyramid to make sure the interpolation is + // fast but also high quality. The first thing we do is figure out how deep the + // image pyramid needs to be. + rectangle bounding_box; + for (unsigned long i = 0; i < chip_locations.size(); ++i) + { + long depth = 0; + double grow = 2; + drectangle rect = pyr.rect_down(chip_locations[i].rect); + while (rect.area() > chip_locations[i].size()) + { + rect = pyr.rect_down(rect); + ++depth; + // We drop the image size by a factor of 2 each iteration and then assume a + // border of 2 pixels is needed to avoid any border effects of the crop. + grow = grow*2 + 2; + } + drectangle rot_rect; + const vector<double,2> cent = center(chip_locations[i].rect); + rot_rect += rotate_point<double>(cent,chip_locations[i].rect.tl_corner(),chip_locations[i].angle); + rot_rect += rotate_point<double>(cent,chip_locations[i].rect.tr_corner(),chip_locations[i].angle); + rot_rect += rotate_point<double>(cent,chip_locations[i].rect.bl_corner(),chip_locations[i].angle); + rot_rect += rotate_point<double>(cent,chip_locations[i].rect.br_corner(),chip_locations[i].angle); + bounding_box += grow_rect(rot_rect, grow).intersect(get_rect(img)); + max_depth = std::max(depth,max_depth); + } + //std::cout << "max_depth: " << max_depth << std::endl; + //std::cout << "crop amount: " << bounding_box.area()/(double)get_rect(img).area() << std::endl; + + // now make an image pyramid + dlib::array<array2d<typename image_traits<image_type1>::pixel_type> > levels(max_depth); + if (levels.size() != 0) + pyr(sub_image(img,bounding_box),levels[0]); + for (unsigned long i = 1; i < levels.size(); ++i) + pyr(levels[i-1],levels[i]); + + std::vector<dlib::vector<double,2> > from, to; + + // now pull out the chips + chips.resize(chip_locations.size()); + for (unsigned long i = 0; i < chips.size(); ++i) + { + // If the chip doesn't have any rotation or scaling then use the basic version + // of chip extraction that just does a fast copy. + if (chip_locations[i].angle == 0 && + chip_locations[i].rows == chip_locations[i].rect.height() && + chip_locations[i].cols == chip_locations[i].rect.width()) + { + impl::basic_extract_image_chip(img, chip_locations[i].rect, chips[i]); + } + else + { + set_image_size(chips[i], chip_locations[i].rows, chip_locations[i].cols); + + // figure out which level in the pyramid to use to extract the chip + int level = -1; + drectangle rect = translate_rect(chip_locations[i].rect, -bounding_box.tl_corner()); + while (pyr.rect_down(rect).area() > chip_locations[i].size()) + { + ++level; + rect = pyr.rect_down(rect); + } + + // find the appropriate transformation that maps from the chip to the input + // image + from.clear(); + to.clear(); + from.push_back(get_rect(chips[i]).tl_corner()); to.push_back(rotate_point<double>(center(rect),rect.tl_corner(),chip_locations[i].angle)); + from.push_back(get_rect(chips[i]).tr_corner()); to.push_back(rotate_point<double>(center(rect),rect.tr_corner(),chip_locations[i].angle)); + from.push_back(get_rect(chips[i]).bl_corner()); to.push_back(rotate_point<double>(center(rect),rect.bl_corner(),chip_locations[i].angle)); + point_transform_affine trns = find_affine_transform(from,to); + + // now extract the actual chip + if (level == -1) + transform_image(sub_image(img,bounding_box),chips[i],interp,trns); + else + transform_image(levels[level],chips[i],interp,trns); + } + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type1, + typename image_type2 + > + void extract_image_chips( + const image_type1& img, + const std::vector<chip_details>& chip_locations, + dlib::array<image_type2>& chips + ) + { + extract_image_chips(img, chip_locations, chips, interpolate_bilinear()); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type1, + typename image_type2, + typename interpolation_type + > + void extract_image_chip ( + const image_type1& img, + const chip_details& location, + image_type2& chip, + const interpolation_type& interp + ) + { + // If the chip doesn't have any rotation or scaling then use the basic version of + // chip extraction that just does a fast copy. + if (location.angle == 0 && + location.rows == location.rect.height() && + location.cols == location.rect.width()) + { + impl::basic_extract_image_chip(img, location.rect, chip); + } + else + { + std::vector<chip_details> chip_locations(1,location); + dlib::array<image_type2> chips; + extract_image_chips(img, chip_locations, chips, interp); + swap(chips[0], chip); + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type1, + typename image_type2 + > + void extract_image_chip ( + const image_type1& img, + const chip_details& location, + image_type2& chip + ) + { + extract_image_chip(img, location, chip, interpolate_bilinear()); + } + +// ---------------------------------------------------------------------------------------- + + inline chip_details get_face_chip_details ( + const full_object_detection& det, + const unsigned long size = 200, + const double padding = 0.2 + ) + { + DLIB_CASSERT(det.num_parts() == 68 || det.num_parts() == 5, + "\t chip_details get_face_chip_details()" + << "\n\t You have to give either a 5 point or 68 point face landmarking output to this function. " + << "\n\t det.num_parts(): " << det.num_parts() + ); + DLIB_CASSERT(padding >= 0 && size > 0, + "\t chip_details get_face_chip_details()" + << "\n\t Invalid inputs were given to this function." + << "\n\t padding: " << padding + << "\n\t size: " << size + ); + + + std::vector<dpoint> from_points, to_points; + if (det.num_parts() == 5) + { + dpoint p0(0.8595674595992, 0.2134981538014); + dpoint p1(0.6460604764104, 0.2289674387677); + dpoint p2(0.1205750620789, 0.2137274526848); + dpoint p3(0.3340850613712, 0.2290642403242); + dpoint p4(0.4901123135679, 0.6277975316475); + + + p0 = (padding+p0)/(2*padding+1); + p1 = (padding+p1)/(2*padding+1); + p2 = (padding+p2)/(2*padding+1); + p3 = (padding+p3)/(2*padding+1); + p4 = (padding+p4)/(2*padding+1); + + from_points.push_back(p0*size); + to_points.push_back(det.part(0)); + + from_points.push_back(p1*size); + to_points.push_back(det.part(1)); + + from_points.push_back(p2*size); + to_points.push_back(det.part(2)); + + from_points.push_back(p3*size); + to_points.push_back(det.part(3)); + + from_points.push_back(p4*size); + to_points.push_back(det.part(4)); + } + else + { + // Average positions of face points 17-67 + const double mean_face_shape_x[] = { + 0.000213256, 0.0752622, 0.18113, 0.29077, 0.393397, 0.586856, 0.689483, 0.799124, + 0.904991, 0.98004, 0.490127, 0.490127, 0.490127, 0.490127, 0.36688, 0.426036, + 0.490127, 0.554217, 0.613373, 0.121737, 0.187122, 0.265825, 0.334606, 0.260918, + 0.182743, 0.645647, 0.714428, 0.793132, 0.858516, 0.79751, 0.719335, 0.254149, + 0.340985, 0.428858, 0.490127, 0.551395, 0.639268, 0.726104, 0.642159, 0.556721, + 0.490127, 0.423532, 0.338094, 0.290379, 0.428096, 0.490127, 0.552157, 0.689874, + 0.553364, 0.490127, 0.42689 + }; + const double mean_face_shape_y[] = { + 0.106454, 0.038915, 0.0187482, 0.0344891, 0.0773906, 0.0773906, 0.0344891, + 0.0187482, 0.038915, 0.106454, 0.203352, 0.307009, 0.409805, 0.515625, 0.587326, + 0.609345, 0.628106, 0.609345, 0.587326, 0.216423, 0.178758, 0.179852, 0.231733, + 0.245099, 0.244077, 0.231733, 0.179852, 0.178758, 0.216423, 0.244077, 0.245099, + 0.780233, 0.745405, 0.727388, 0.742578, 0.727388, 0.745405, 0.780233, 0.864805, + 0.902192, 0.909281, 0.902192, 0.864805, 0.784792, 0.778746, 0.785343, 0.778746, + 0.784792, 0.824182, 0.831803, 0.824182 + }; + + COMPILE_TIME_ASSERT(sizeof(mean_face_shape_x)/sizeof(double) == 68-17); + + for (unsigned long i = 17; i < det.num_parts(); ++i) + { + // Ignore the lower lip + if ((55 <= i && i <= 59) || (65 <= i && i <= 67)) + continue; + // Ignore the eyebrows + if (17 <= i && i <= 26) + continue; + + dpoint p; + p.x() = (padding+mean_face_shape_x[i-17])/(2*padding+1); + p.y() = (padding+mean_face_shape_y[i-17])/(2*padding+1); + from_points.push_back(p*size); + to_points.push_back(det.part(i)); + } + } + + return chip_details(from_points, to_points, chip_dims(size,size)); + } + +// ---------------------------------------------------------------------------------------- + + inline std::vector<chip_details> get_face_chip_details ( + const std::vector<full_object_detection>& dets, + const unsigned long size = 200, + const double padding = 0.2 + ) + { + std::vector<chip_details> res; + res.reserve(dets.size()); + for (unsigned long i = 0; i < dets.size(); ++i) + res.push_back(get_face_chip_details(dets[i], size, padding)); + return res; + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + image_type jitter_image( + const image_type& img, + dlib::rand& rnd + ) + { + DLIB_CASSERT(num_rows(img)*num_columns(img) != 0); + DLIB_CASSERT(num_rows(img)==num_columns(img)); + + const double max_rotation_degrees = 3; + const double min_object_height = 0.97; + const double max_object_height = 0.99999; + const double translate_amount = 0.02; + + + const auto rect = shrink_rect(get_rect(img),3); + + // perturb the location of the crop by a small fraction of the object's size. + const point rand_translate = dpoint(rnd.get_double_in_range(-translate_amount,translate_amount)*rect.width(), + rnd.get_double_in_range(-translate_amount,translate_amount)*rect.height()); + + // perturb the scale of the crop by a fraction of the object's size + const double rand_scale_perturb = rnd.get_double_in_range(min_object_height, max_object_height); + + const long box_size = rect.height()/rand_scale_perturb; + const auto crop_rect = centered_rect(center(rect)+rand_translate, box_size, box_size); + const double angle = rnd.get_double_in_range(-max_rotation_degrees, max_rotation_degrees)*pi/180; + image_type crop; + extract_image_chip(img, chip_details(crop_rect, chip_dims(img.nr(),img.nc()), angle), crop); + if (rnd.get_random_double() > 0.5) + flip_image_left_right(crop); + + return crop; + } + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_INTERPOlATIONh_ + diff --git a/ml/dlib/dlib/image_transforms/interpolation_abstract.h b/ml/dlib/dlib/image_transforms/interpolation_abstract.h new file mode 100644 index 000000000..f2da2fb02 --- /dev/null +++ b/ml/dlib/dlib/image_transforms/interpolation_abstract.h @@ -0,0 +1,1480 @@ +// Copyright (C) 2012 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_INTERPOlATION_ABSTRACT_ +#ifdef DLIB_INTERPOlATION_ABSTRACT_ + +#include "../pixel.h" +#include "../image_processing/full_object_detection_abstract.h" +#include "../image_processing/generic_image.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + class interpolate_nearest_neighbor + { + /*! + WHAT THIS OBJECT REPRESENTS + This object is a tool for performing nearest neighbor interpolation + on an image. + !*/ + + public: + + template < + typename image_view_type, + typename pixel_type + > + bool operator() ( + const image_view_type& img, + const dlib::point& p, + pixel_type& result + ) const; + /*! + requires + - image_view_type == an image_view or const_image_view object. + - pixel_traits<typename image_view_type::pixel_type>::has_alpha == false + - pixel_traits<pixel_type> is defined + ensures + - if (p is located inside img) then + - #result == img[p.y()][p.x()] + (This assignment is done using assign_pixel(#result, img[p.y()][p.x()]), + therefore any necessary color space conversion will be performed) + - returns true + - else + - returns false + !*/ + + }; + +// ---------------------------------------------------------------------------------------- + + class interpolate_bilinear + { + + /*! + WHAT THIS OBJECT REPRESENTS + This object is a tool for performing bilinear interpolation + on an image. This is performed by looking at the 4 pixels + nearest to a point and deriving an interpolated value from them. + !*/ + + public: + + template < + typename T, + typename image_view_type, + typename pixel_type + > + bool operator() ( + const image_view_type& img, + const dlib::vector<T,2>& p, + pixel_type& result + ) const; + /*! + requires + - image_view_type == an image_view or const_image_view object + - pixel_traits<typename image_view_type::pixel_type>::has_alpha == false + - pixel_traits<pixel_type> is defined + ensures + - if (there is an interpolatable image location at point p in img) then + - #result == the interpolated pixel value from img at point p. + - assign_pixel() will be used to write to #result, therefore any + necessary color space conversion will be performed. + - returns true + - if img contains RGB pixels then the interpolation will be in color. + Otherwise, the interpolation will be performed in a grayscale mode. + - else + - returns false + !*/ + }; + +// ---------------------------------------------------------------------------------------- + + class interpolate_quadratic + { + /*! + WHAT THIS OBJECT REPRESENTS + This object is a tool for performing quadratic interpolation + on an image. This is performed by looking at the 9 pixels + nearest to a point and deriving an interpolated value from them. + !*/ + + public: + + template < + typename T, + typename image_view_type, + typename pixel_type + > + bool operator() ( + const image_view_type& img, + const dlib::vector<T,2>& p, + pixel_type& result + ) const; + /*! + requires + - image_view_type == an image_view or const_image_view object. + - pixel_traits<typename image_view_type::pixel_type>::has_alpha == false + - pixel_traits<pixel_type> is defined + ensures + - if (there is an interpolatable image location at point p in img) then + - #result == the interpolated pixel value from img at point p + - assign_pixel() will be used to write to #result, therefore any + necessary color space conversion will be performed. + - returns true + - if img contains RGB pixels then the interpolation will be in color. + Otherwise, the interpolation will be performed in a grayscale mode. + - else + - returns false + !*/ + }; + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + + class black_background + { + /*! + WHAT THIS OBJECT REPRESENTS + This is a function object which simply sets a pixel + to have a black value. + !*/ + + public: + template <typename pixel_type> + void operator() ( pixel_type& p) const { assign_pixel(p, 0); } + }; + +// ---------------------------------------------------------------------------------------- + + class white_background + { + /*! + WHAT THIS OBJECT REPRESENTS + This is a function object which simply sets a pixel + to have a white value. + !*/ + + public: + template <typename pixel_type> + void operator() ( pixel_type& p) const { assign_pixel(p, 255); } + }; + +// ---------------------------------------------------------------------------------------- + + class no_background + { + /*! + WHAT THIS OBJECT REPRESENTS + This is a function object which does nothing. It is useful + when used with the transform_image() routine defined below + if no modification of uninterpolated output pixels is desired. + !*/ + public: + template <typename pixel_type> + void operator() ( pixel_type& ) const { } + }; + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + + template < + typename image_type1, + typename image_type2, + typename interpolation_type, + typename point_mapping_type, + typename background_type + > + void transform_image ( + const image_type1& in_img, + image_type2& out_img, + const interpolation_type& interp, + const point_mapping_type& map_point, + const background_type& set_background, + const rectangle& area + ); + /*! + requires + - image_type1 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - image_type2 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - interpolation_type == interpolate_nearest_neighbor, interpolate_bilinear, + interpolate_quadratic, or a type with a compatible interface. + - map_point should be a function which takes dlib::vector<T,2> objects and + returns dlib::vector<T,2> objects. An example is point_transform_affine. + - set_background should be a function which can take a single argument of + type image_traits<image_type2>::pixel_type. Examples are black_background, + white_background, and no_background. + - get_rect(out_img).contains(area) == true + - is_same_object(in_img, out_img) == false + ensures + - The map_point function defines a mapping from pixels in out_img to pixels + in in_img. transform_image() uses this mapping, along with the supplied + interpolation routine interp, to fill the region of out_img defined by + area with an interpolated copy of in_img. + - This function does not change the size of out_img. + - Only pixels inside the region defined by area in out_img are modified. + - For all locations r and c such that area.contains(c,r) but have no corresponding + locations in in_img: + - set_background(out_img[r][c]) is invoked + (i.e. some parts of out_img might correspond to areas outside in_img and + therefore can't supply interpolated values. In these cases, these + pixels can be assigned a value by the supplied set_background() routine) + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type1, + typename image_type2, + typename interpolation_type, + typename point_mapping_type, + typename background_type + > + void transform_image ( + const image_type1& in_img, + image_type2& out_img, + const interpolation_type& interp, + const point_mapping_type& map_point, + const background_type& set_background + ); + /*! + requires + - image_type1 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - image_type2 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - interpolation_type == interpolate_nearest_neighbor, interpolate_bilinear, + interpolate_quadratic, or a type with a compatible interface. + - map_point should be a function which takes dlib::vector<T,2> objects and + returns dlib::vector<T,2> objects. An example is point_transform_affine. + - set_background should be a function which can take a single argument of + type image_traits<image_type2>::pixel_type. Examples are black_background, white_background, + and no_background. + - is_same_object(in_img, out_img) == false + ensures + - performs: + transform_image(in_img, out_img, interp, map_point, set_background, get_rect(out_img)); + (i.e. runs transform_image() on the entire out_img) + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type1, + typename image_type2, + typename interpolation_type, + typename point_mapping_type + > + void transform_image ( + const image_type1& in_img, + image_type2& out_img, + const interpolation_type& interp, + const point_mapping_type& map_point + ); + /*! + requires + - image_type1 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - image_type2 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - interpolation_type == interpolate_nearest_neighbor, interpolate_bilinear, + interpolate_quadratic, or a type with a compatible interface. + - map_point should be a function which takes dlib::vector<T,2> objects and + returns dlib::vector<T,2> objects. An example is point_transform_affine. + - is_same_object(in_img, out_img) == false + ensures + - performs: + transform_image(in_img, out_img, interp, map_point, black_background(), get_rect(out_img)); + (i.e. runs transform_image() on the entire out_img and sets non-interpolated + pixels to black) + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type1, + typename image_type2, + typename interpolation_type + > + point_transform_affine rotate_image ( + const image_type1& in_img, + image_type2& out_img, + double angle, + const interpolation_type& interp + ); + /*! + requires + - image_type1 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - image_type2 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - interpolation_type == interpolate_nearest_neighbor, interpolate_bilinear, + interpolate_quadratic, or a type with a compatible interface. + - is_same_object(in_img, out_img) == false + ensures + - #out_img == a copy of in_img which has been rotated angle radians counter clockwise. + The rotation is performed with respect to the center of the image. + - Parts of #out_img which have no corresponding locations in in_img are set to black. + - uses the supplied interpolation routine interp to perform the necessary + pixel interpolation. + - returns a transformation object that maps points in in_img into their corresponding + location in #out_img. + !*/ + +// ---------------------------------------------------------------------------------------- + + + template < + typename image_type1, + typename image_type2 + > + point_transform_affine rotate_image ( + const image_type1& in_img, + image_type2& out_img, + double angle + ); + /*! + requires + - image_type1 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - image_type2 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - pixel_traits<typename image_traits<image_type1>::pixel_type>::has_alpha == false + - is_same_object(in_img, out_img) == false + ensures + - #out_img == a copy of in_img which has been rotated angle radians counter clockwise. + The rotation is performed with respect to the center of the image. + - Parts of #out_img which have no corresponding locations in in_img are set to black. + - uses the interpolate_quadratic object to perform the necessary pixel interpolation. + - returns a transformation object that maps points in in_img into their corresponding + location in #out_img. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type1, + typename image_type2, + typename interpolation_type + > + void resize_image ( + const image_type1& in_img, + image_type2& out_img, + const interpolation_type& interp + ); + /*! + requires + - image_type1 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - image_type2 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - interpolation_type == interpolate_nearest_neighbor, interpolate_bilinear, + interpolate_quadratic, or a type with a compatible interface. + - is_same_object(in_img, out_img) == false + ensures + - #out_img == A copy of in_img which has been stretched so that it + fits exactly into out_img. + - The size of out_img is not modified. I.e. + - #out_img.nr() == out_img.nr() + - #out_img.nc() == out_img.nc() + - uses the supplied interpolation routine interp to perform the necessary + pixel interpolation. + !*/ + +// ---------------------------------------------------------------------------------------- + + + template < + typename image_type1, + typename image_type2 + > + void resize_image ( + const image_type1& in_img, + image_type2& out_img + ); + /*! + requires + - image_type1 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - image_type2 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - pixel_traits<typename image_traits<image_type1>::pixel_type>::has_alpha == false + - is_same_object(in_img, out_img) == false + ensures + - #out_img == A copy of in_img which has been stretched so that it + fits exactly into out_img. + - The size of out_img is not modified. I.e. + - #out_img.nr() == out_img.nr() + - #out_img.nc() == out_img.nc() + - Uses the bilinear interpolation to perform the necessary pixel interpolation. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + void resize_image ( + double size_scale, + image_type& img + ); + /*! + requires + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - pixel_traits<typename image_traits<image_type>::pixel_type>::has_alpha == false + ensures + - Resizes img so that each of it's dimensions are size_scale times larger than img. + In particular, we will have: + - #img.nr() == std::round(size_scale*img.nr()) + - #img.nc() == std::round(size_scale*img.nc()) + - #img == a bilinearly interpolated copy of the input image. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type1, + typename image_type2 + > + point_transform_affine flip_image_left_right ( + const image_type1& in_img, + image_type2& out_img + ); + /*! + requires + - image_type1 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - image_type2 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - is_same_object(in_img, out_img) == false + ensures + - #out_img.nr() == in_img.nr() + - #out_img.nc() == in_img.nc() + - #out_img == a copy of in_img which has been flipped from left to right. + (i.e. it is flipped as if viewed though a mirror) + - returns a transformation object that maps points in in_img into their + corresponding location in #out_img. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + point_transform_affine flip_image_left_right ( + image_type& img + ); + /*! + requires + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + ensures + - This function is identical to the above version of flip_image_left_right() + except that it operates in-place. + - #img.nr() == img.nr() + - #img.nc() == img.nc() + - #img == a copy of img which has been flipped from left to right. + (i.e. it is flipped as if viewed though a mirror) + - returns a transformation object that maps points in img into their + corresponding location in #img. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_array_type, + typename T + > + void add_image_left_right_flips ( + image_array_type& images, + std::vector<std::vector<T> >& objects + ); + /*! + requires + - image_array_type == a dlib::array or std::vector of image objects that each + implement the interface defined in dlib/image_processing/generic_image.h + - T == rectangle, full_object_detection, or mmod_rect + - images.size() == objects.size() + ensures + - This function computes all the left/right flips of the contents of images and + then appends them onto the end of the images array. It also finds the + left/right flips of the rectangles in objects and similarly appends them into + objects. That is, we assume objects[i] is the set of bounding boxes in + images[i] and we flip the bounding boxes so that they still bound the same + objects in the new flipped images. + - #images.size() == images.size()*2 + - #objects.size() == objects.size()*2 + - All the original elements of images and objects are left unmodified. That + is, this function only appends new elements to each of these containers. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_array_type, + typename T, + typename U + > + void add_image_left_right_flips ( + image_array_type& images, + std::vector<std::vector<T> >& objects, + std::vector<std::vector<U> >& objects2 + ); + /*! + requires + - image_array_type == a dlib::array or std::vector of image objects that each + implement the interface defined in dlib/image_processing/generic_image.h + - images.size() == objects.size() + - images.size() == objects2.size() + - T == rectangle, full_object_detection, or mmod_rect + - U == rectangle, full_object_detection, or mmod_rect + ensures + - This function computes all the left/right flips of the contents of images and + then appends them onto the end of the images array. It also finds the + left/right flips of the rectangles in objects and objects2 and similarly + appends them into objects and objects2 respectively. That is, we assume + objects[i] is the set of bounding boxes in images[i] and we flip the bounding + boxes so that they still bound the same objects in the new flipped images. + We similarly flip the boxes in objects2. + - #images.size() == images.size()*2 + - #objects.size() == objects.size()*2 + - #objects2.size() == objects2.size()*2 + - All the original elements of images, objects, and objects2 are left unmodified. + That is, this function only appends new elements to each of these containers. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_array_type, + typename EXP, + typename T, + typename U + > + void add_image_rotations ( + const matrix_exp<EXP>& angles, + image_array_type& images, + std::vector<std::vector<T> >& objects, + std::vector<std::vector<U> >& objects2 + ); + /*! + requires + - image_array_type == a dlib::array or std::vector of image objects that each + implement the interface defined in dlib/image_processing/generic_image.h + - is_vector(angles) == true + - angles.size() > 0 + - images.size() == objects.size() + - images.size() == objects2.size() + - T == rectangle, full_object_detection, or mmod_rect + - U == rectangle, full_object_detection, or mmod_rect + ensures + - This function computes angles.size() different rotations of all the given + images and then replaces the contents of images with those rotations of the + input dataset. We will also adjust the rectangles inside objects and + objects2 so that they still bound the same objects in the new rotated images. + That is, we assume objects[i] and objects2[i] are bounding boxes for things + in images[i]. So we will adjust the positions of the boxes in objects and + objects2 accordingly. + - The elements of angles are interpreted as angles in radians and we will + rotate the images around their center using the values in angles. Moreover, + the rotation is done counter clockwise. + - #images.size() == images.size()*angles.size() + - #objects.size() == objects.size()*angles.size() + - #objects2.size() == objects2.size()*angles.size() + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_array_type, + typename EXP, + typename T + > + void add_image_rotations ( + const matrix_exp<EXP>& angles, + image_array_type& images, + std::vector<std::vector<T> >& objects + ); + /*! + requires + - image_array_type == a dlib::array or std::vector of image objects that each + implement the interface defined in dlib/image_processing/generic_image.h + - is_vector(angles) == true + - angles.size() > 0 + - images.size() == objects.size() + - T == rectangle, full_object_detection, or mmod_rect + ensures + - This function is identical to the add_image_rotations() define above except + that it doesn't have objects2 as an argument. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_array_type + > + void flip_image_dataset_left_right ( + image_array_type& images, + std::vector<std::vector<rectangle> >& objects + ); + /*! + requires + - image_array_type == a dlib::array or std::vector of image objects that each + implement the interface defined in dlib/image_processing/generic_image.h + - images.size() == objects.size() + ensures + - This function replaces each image in images with the left/right flipped + version of the image. Therefore, #images[i] will contain the left/right + flipped version of images[i]. It also flips all the rectangles in objects so + that they still bound the same visual objects in each image. + - #images.size() == image.size() + - #objects.size() == objects.size() + - for all valid i: + #objects[i].size() == objects[i].size() + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_array_type + > + void flip_image_dataset_left_right ( + image_array_type& images, + std::vector<std::vector<rectangle> >& objects, + std::vector<std::vector<rectangle> >& objects2 + ); + /*! + requires + - image_array_type == a dlib::array or std::vector of image objects that each + implement the interface defined in dlib/image_processing/generic_image.h + - images.size() == objects.size() + - images.size() == objects2.size() + ensures + - This function replaces each image in images with the left/right flipped + version of the image. Therefore, #images[i] will contain the left/right + flipped version of images[i]. It also flips all the rectangles in objects + and objects2 so that they still bound the same visual objects in each image. + - #images.size() == image.size() + - #objects.size() == objects.size() + - #objects2.size() == objects2.size() + - for all valid i: + #objects[i].size() == objects[i].size() + - for all valid i: + #objects2[i].size() == objects2[i].size() + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename pyramid_type, + typename image_array_type + > + void upsample_image_dataset ( + image_array_type& images, + std::vector<std::vector<rectangle> >& objects, + unsigned long max_image_size = std::numeric_limits<unsigned long>::max() + ); + /*! + requires + - image_array_type == a dlib::array or std::vector of image objects that each + implement the interface defined in dlib/image_processing/generic_image.h + - images.size() == objects.size() + ensures + - This function replaces each image in images with an upsampled version of that + image. Each image is upsampled using pyramid_up() and the given + pyramid_type. Therefore, #images[i] will contain the larger upsampled + version of images[i]. It also adjusts all the rectangles in objects so that + they still bound the same visual objects in each image. + - Input images already containing more than max_image_size pixels are not upsampled. + - #images.size() == image.size() + - #objects.size() == objects.size() + - for all valid i: + #objects[i].size() == objects[i].size() + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename pyramid_type, + typename image_array_type + > + void upsample_image_dataset ( + image_array_type& images, + std::vector<std::vector<mmod_rect>>& objects, + unsigned long max_image_size = std::numeric_limits<unsigned long>::max() + ); + /*! + requires + - image_array_type == a dlib::array or std::vector of image objects that each + implement the interface defined in dlib/image_processing/generic_image.h + - images.size() == objects.size() + ensures + - This function replaces each image in images with an upsampled version of that + image. Each image is upsampled using pyramid_up() and the given + pyramid_type. Therefore, #images[i] will contain the larger upsampled + version of images[i]. It also adjusts all the rectangles in objects so that + they still bound the same visual objects in each image. + - Input images already containing more than max_image_size pixels are not upsampled. + - #images.size() == image.size() + - #objects.size() == objects.size() + - for all valid i: + #objects[i].size() == objects[i].size() + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename pyramid_type, + typename image_array_type, + > + void upsample_image_dataset ( + image_array_type& images, + std::vector<std::vector<rectangle> >& objects, + std::vector<std::vector<rectangle> >& objects2, + unsigned long max_image_size = std::numeric_limits<unsigned long>::max() + ); + /*! + requires + - image_array_type == a dlib::array or std::vector of image objects that each + implement the interface defined in dlib/image_processing/generic_image.h + - images.size() == objects.size() + - images.size() == objects2.size() + ensures + - This function replaces each image in images with an upsampled version of that + image. Each image is upsampled using pyramid_up() and the given + pyramid_type. Therefore, #images[i] will contain the larger upsampled + version of images[i]. It also adjusts all the rectangles in objects and + objects2 so that they still bound the same visual objects in each image. + - Input images already containing more than max_image_size pixels are not upsampled. + - #images.size() == image.size() + - #objects.size() == objects.size() + - #objects2.size() == objects2.size() + - for all valid i: + #objects[i].size() == objects[i].size() + - for all valid i: + #objects2[i].size() == objects2[i].size() + !*/ + +// ---------------------------------------------------------------------------------------- + + template <typename image_array_type> + void rotate_image_dataset ( + double angle, + image_array_type& images, + std::vector<std::vector<rectangle> >& objects + ); + /*! + requires + - image_array_type == a dlib::array or std::vector of image objects that each + implement the interface defined in dlib/image_processing/generic_image.h + - images.size() == objects.size() + ensures + - This function replaces each image in images with a rotated version of that + image. In particular, each image is rotated using + rotate_image(original,rotated,angle). Therefore, the images are rotated + angle radians counter clockwise around their centers. That is, #images[i] + will contain the rotated version of images[i]. It also adjusts all + the rectangles in objects so that they still bound the same visual objects in + each image. + - All the rectangles will still have the same sizes and aspect ratios after + rotation. They will simply have had their positions adjusted so they still + fall on the same objects. + - #images.size() == image.size() + - #objects.size() == objects.size() + - for all valid i: + #objects[i].size() == objects[i].size() + !*/ + +// ---------------------------------------------------------------------------------------- + + template <typename image_array_type> + void rotate_image_dataset ( + double angle, + image_array_type& images, + std::vector<std::vector<rectangle> >& objects, + std::vector<std::vector<rectangle> >& objects2 + ); + /*! + requires + - image_array_type == a dlib::array or std::vector of image objects that each + implement the interface defined in dlib/image_processing/generic_image.h + - images.size() == objects.size() + - images.size() == objects2.size() + ensures + - This function replaces each image in images with a rotated version of that + image. In particular, each image is rotated using + rotate_image(original,rotated,angle). Therefore, the images are rotated + angle radians counter clockwise around their centers. That is, #images[i] + will contain the rotated version of images[i]. It also adjusts all + the rectangles in objects and objects2 so that they still bound the same + visual objects in each image. + - All the rectangles will still have the same sizes and aspect ratios after + rotation. They will simply have had their positions adjusted so they still + fall on the same objects. + - #images.size() == image.size() + - #objects.size() == objects.size() + - #objects2.size() == objects2.size() + - for all valid i: + #objects[i].size() == objects[i].size() + - for all valid i: + #objects2[i].size() == objects2[i].size() + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type1, + typename image_type2 + > + void flip_image_up_down ( + const image_type1& in_img, + image_type2& out_img + ); + /*! + requires + - image_type1 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - image_type2 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - is_same_object(in_img, out_img) == false + ensures + - #out_img.nr() == in_img.nr() + - #out_img.nc() == in_img.nc() + - #out_img == a copy of in_img which has been flipped upside down. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type1, + typename image_type2, + typename pyramid_type, + typename interpolation_type + > + void pyramid_up ( + const image_type1& in_img, + image_type2& out_img, + const pyramid_type& pyr, + const interpolation_type& interp + ); + /*! + requires + - image_type1 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - image_type2 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - pyramid_type == a type compatible with the image pyramid objects defined + in dlib/image_transforms/image_pyramid_abstract.h + - interpolation_type == interpolate_nearest_neighbor, interpolate_bilinear, + interpolate_quadratic, or a type with a compatible interface. + - is_same_object(in_img, out_img) == false + ensures + - This function inverts the downsampling transformation performed by pyr(). + In particular, it attempts to make an image, out_img, which would result + in in_img when downsampled with pyr(). + - #out_img == An upsampled copy of in_img. In particular, downsampling + #out_img 1 time with pyr() should result in a final image which looks like + in_img. + - Uses the supplied interpolation routine interp to perform the necessary + pixel interpolation. + - Note that downsampling an image with pyr() and then upsampling it with + pyramid_up() will not necessarily result in a final image which is + the same size as the original. This is because the exact size of the + original image cannot be determined based on the downsampled image. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type1, + typename image_type2, + typename pyramid_type + > + void pyramid_up ( + const image_type1& in_img, + image_type2& out_img, + const pyramid_type& pyr + ); + /*! + requires + - image_type1 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - image_type2 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - pyramid_type == a type compatible with the image pyramid objects defined + in dlib/image_transforms/image_pyramid_abstract.h + - is_same_object(in_img, out_img) == false + ensures + - performs: pyramid_up(in_img, out_img, pyr, interpolate_bilinear()); + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type, + typename pyramid_type + > + void pyramid_up ( + image_type& img, + const pyramid_type& pyr + ); + /*! + requires + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - pyramid_type == a type compatible with the image pyramid objects defined + in dlib/image_transforms/image_pyramid_abstract.h + ensures + - Performs an in-place version of pyramid_up() on the given image. In + particular, this function is equivalent to: + pyramid_up(img, temp, pyr); + temp.swap(img); + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + void pyramid_up ( + image_type& img + ); + /*! + requires + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + ensures + - performs: pyramid_up(img, pyramid_down<2>()); + (i.e. it upsamples the given image and doubles it in size.) + !*/ + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + + struct chip_dims + { + /*! + WHAT THIS OBJECT REPRESENTS + This is a simple tool for passing in a pair of row and column values to the + chip_details constructor. + !*/ + + chip_dims ( + unsigned long rows_, + unsigned long cols_ + ) : rows(rows_), cols(cols_) { } + + unsigned long rows; + unsigned long cols; + }; + +// ---------------------------------------------------------------------------------------- + + struct chip_details + { + /*! + WHAT THIS OBJECT REPRESENTS + This object describes where an image chip is to be extracted from within + another image. In particular, it specifies that the image chip is + contained within the rectangle this->rect and that prior to extraction the + image should be rotated counter-clockwise by this->angle radians. Finally, + the extracted chip should have this->rows rows and this->cols columns in it + regardless of the shape of this->rect. This means that the extracted chip + will be stretched to fit via bilinear interpolation when necessary. + !*/ + + chip_details( + ); + /*! + ensures + - #rect.is_empty() == true + - #size() == 0 + - #angle == 0 + - #rows == 0 + - #cols == 0 + !*/ + + chip_details( + const drectangle& rect_ + ); + /*! + ensures + - #rect == rect_ + - #size() == rect_.area() + - #angle == 0 + - #rows == rect_.height() + - #cols == rect_.width() + !*/ + + chip_details( + const rectangle& rect_ + ); + /*! + ensures + - #rect == rect_ + - #size() == rect_.area() + - #angle == 0 + - #rows == rect_.height() + - #cols == rect_.width() + !*/ + + chip_details( + const drectangle& rect_, + unsigned long size_ + ); + /*! + ensures + - #rect == rect_ + - #size() == size_ + - #angle == 0 + - #rows and #cols is set such that the total size of the chip is as close + to size_ as possible but still matches the aspect ratio of rect_. + - As long as size_ and the aspect ratio of of rect_ stays constant then + #rows and #cols will always have the same values. This means that, for + example, if you want all your chips to have the same dimensions then + ensure that size_ is always the same and also that rect_ always has the + same aspect ratio. Otherwise the calculated values of #rows and #cols + may be different for different chips. Alternatively, you can use the + chip_details constructor below that lets you specify the exact values for + rows and cols. + !*/ + + chip_details( + const drectangle& rect_, + unsigned long size_, + double angle_ + ); + /*! + ensures + - #rect == rect_ + - #size() == size_ + - #angle == angle_ + - #rows and #cols is set such that the total size of the chip is as close + to size_ as possible but still matches the aspect ratio of rect_. + - As long as size_ and the aspect ratio of of rect_ stays constant then + #rows and #cols will always have the same values. This means that, for + example, if you want all your chips to have the same dimensions then + ensure that size_ is always the same and also that rect_ always has the + same aspect ratio. Otherwise the calculated values of #rows and #cols + may be different for different chips. Alternatively, you can use the + chip_details constructor below that lets you specify the exact values for + rows and cols. + !*/ + + chip_details( + const drectangle& rect_, + const chip_dims& dims + ); + /*! + ensures + - #rect == rect_ + - #size() == dims.rows*dims.cols + - #angle == 0 + - #rows == dims.rows + - #cols == dims.cols + !*/ + + chip_details( + const drectangle& rect_, + const chip_dims& dims, + double angle_ + ); + /*! + ensures + - #rect == rect_ + - #size() == dims.rows*dims.cols + - #angle == angle_ + - #rows == dims.rows + - #cols == dims.cols + !*/ + + template <typename T> + chip_details( + const std::vector<dlib::vector<T,2> >& chip_points, + const std::vector<dlib::vector<T,2> >& img_points, + const chip_dims& dims + ); + /*! + requires + - chip_points.size() == img_points.size() + - chip_points.size() >= 2 + ensures + - The chip will be extracted such that the pixel locations chip_points[i] + in the chip are mapped to img_points[i] in the original image by a + similarity transform. That is, if you know the pixelwize mapping you + want between the chip and the original image then you use this function + of chip_details constructor to define the mapping. + - #rows == dims.rows + - #cols == dims.cols + - #size() == dims.rows*dims.cols + - #rect and #angle are computed based on the given size of the output chip + (specified by dims) and the similarity transform between the chip and + image (specified by chip_points and img_points). + !*/ + + inline unsigned long size() const { return rows*cols; } + /*! + ensures + - returns the number of pixels in this chip. This is just rows*cols. + !*/ + + drectangle rect; + double angle; + unsigned long rows; + unsigned long cols; + }; + +// ---------------------------------------------------------------------------------------- + + point_transform_affine get_mapping_to_chip ( + const chip_details& details + ); + /*! + ensures + - returns a transformation that maps from the pixels in the original image + to the pixels in the cropped image defined by the given details object. + !*/ + +// ---------------------------------------------------------------------------------------- + + full_object_detection map_det_to_chip ( + const full_object_detection& det, + const chip_details& details + ); + /*! + ensures + - Maps the given detection into the pixel space of the image chip defined by + the given details object. That is, this function returns an object D such + that: + - D.get_rect() == a box that bounds the same thing in the image chip as + det.get_rect() bounds in the original image the chip is extracted from. + - for all valid i: + - D.part(i) == the location in the image chip corresponding to + det.part(i) in the original image. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type1, + typename image_type2, + typename interpolation_type + > + void extract_image_chips ( + const image_type1& img, + const std::vector<chip_details>& chip_locations, + dlib::array<image_type2>& chips, + const interpolation_type& interp + ); + /*! + requires + - image_type1 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - image_type2 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - pixel_traits<typename image_traits<image_type1>::pixel_type>::has_alpha == false + - for all valid i: + - chip_locations[i].rect.is_empty() == false + - chip_locations[i].size() != 0 + - interpolation_type == interpolate_nearest_neighbor, interpolate_bilinear, + interpolate_quadratic, or a type with a compatible interface. + ensures + - This function extracts "chips" from an image. That is, it takes a list of + rectangular sub-windows (i.e. chips) within an image and extracts those + sub-windows, storing each into its own image. It also scales and rotates the + image chips according to the instructions inside each chip_details object. + It uses the interpolation method supplied as a parameter. + - #chips == the extracted image chips + - #chips.size() == chip_locations.size() + - for all valid i: + - #chips[i] == The image chip extracted from the position + chip_locations[i].rect in img. + - #chips[i].nr() == chip_locations[i].rows + - #chips[i].nc() == chip_locations[i].cols + - The image will have been rotated counter-clockwise by + chip_locations[i].angle radians, around the center of + chip_locations[i].rect, before the chip was extracted. + - Any pixels in an image chip that go outside img are set to 0 (i.e. black). + !*/ + + template < + typename image_type1, + typename image_type2 + > + void extract_image_chips ( + const image_type1& img, + const std::vector<chip_details>& chip_locations, + dlib::array<image_type2>& chips + ); + /*! + ensures + - This function is a simple convenience / compatibility wrapper that calls the + above-defined extract_image_chips() function using bilinear interpolation. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type1, + typename image_type2, + typename interpolation_type + > + void extract_image_chip ( + const image_type1& img, + const chip_details& chip_location, + image_type2& chip, + const interpolation_type& interp + ); + /*! + ensures + - This function simply calls extract_image_chips() with a single chip location + and stores the single output chip into #chip. It uses the provided + interpolation method. + !*/ + + template < + typename image_type1, + typename image_type2 + > + void extract_image_chip ( + const image_type1& img, + const chip_details& chip_location, + image_type2& chip + ); + /*! + ensures + - This function is a simple convenience / compatibility wrapper that calls the + above-defined extract_image_chip() function using bilinear interpolation. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + struct sub_image_proxy + { + /*! + REQUIREMENTS ON image_type + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + + WHAT THIS OBJECT REPRESENTS + This is a lightweight image object for referencing a subwindow of an image. + It implements the generic image interface and can therefore be used with + any function that expects a generic image, excepting that you cannot change + the size of a sub_image_proxy. + + Note that it only stores a pointer to the image data given to its + constructor and therefore does not perform a copy. Moreover, this means + that an instance of this object becomes invalid after the underlying image + data it references is destroyed. + !*/ + sub_image_proxy ( + T& img, + const rectangle& rect + ); + /*! + ensures + - This object is an image that represents the part of img contained within + rect. If rect is larger than img then rect is cropped so that it does + not go outside img. + !*/ + }; + + template < + typename image_type + > + sub_image_proxy<image_type> sub_image ( + image_type& img, + const rectangle& rect + ); + /*! + requires + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + ensures + - returns sub_image_proxy<image_type>(img,rect) + !*/ + + template <typename T> + sub_image_proxy<some_appropriate_type> sub_image ( + T* img, + long nr, + long nc, + long row_stride + ); + /*! + requires + - img == a pointer to at least nr*row_stride T objects + - nr >= 0 + - nc >= 0 + - row_stride >= 0 + ensures + - This function returns an image that is just a thin wrapper around the given + pointer. It will have the dimensions defined by the supplied longs. To be + precise, this function returns an image object IMG such that: + - image_data(IMG) == img + - num_rows(IMG) == nr + - num_columns(IMG) == nc + - width_step(IMG) == row_stride*sizeof(T) + - IMG contains pixels of type T. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + struct const_sub_image_proxy + { + /*! + REQUIREMENTS ON image_type + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + + WHAT THIS OBJECT REPRESENTS + This object is just like sub_image_proxy except that it does not allow the + pixel data to be modified. + !*/ + const_sub_image_proxy ( + const T& img, + const rectangle& rect + ); + /*! + ensures + - This object is an image that represents the part of img contained within + rect. If rect is larger than img then rect is cropped so that it does + not go outside img. + !*/ + }; + + template < + typename image_type + > + const const_sub_image_proxy<image_type> sub_image ( + const image_type& img, + const rectangle& rect + ); + /*! + requires + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + ensures + - returns const_sub_image_proxy<image_type>(img,rect) + !*/ + + template <typename T> + const const_sub_image_proxy<some_appropriate_type> sub_image ( + const T* img, + long nr, + long nc, + long row_stride + ); + /*! + requires + - img == a pointer to at least nr*row_stride T objects + - nr >= 0 + - nc >= 0 + - row_stride >= 0 + ensures + - This function returns an image that is just a thin wrapper around the given + pointer. It will have the dimensions defined by the supplied longs. To be + precise, this function returns an image object IMG such that: + - image_data(IMG) == img + - num_rows(IMG) == nr + - num_columns(IMG) == nc + - width_step(IMG) == row_stride*sizeof(T) + - IMG contains pixels of type T. + !*/ + +// ---------------------------------------------------------------------------------------- + + chip_details get_face_chip_details ( + const full_object_detection& det, + const unsigned long size = 200, + const double padding = 0.2 + ); + /*! + requires + - det.num_parts() == 68 || det.num_parts() == 5 + - size > 0 + - padding >= 0 + ensures + - This function assumes det contains a human face detection with face parts + annotated using the annotation scheme from the iBUG 300-W face landmark + dataset or a 5 point face annotation. Given these assumptions, it creates a + chip_details object that will extract a copy of the face that has been + rotated upright, centered, and scaled to a standard size when given to + extract_image_chip(). + - This function is specifically calibrated to work with one of these models: + - http://dlib.net/files/shape_predictor_5_face_landmarks.dat.bz2 + - http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2 + - The extracted chips will have size rows and columns in them. + - if padding == 0 then the chip will be closely cropped around the face. + Setting larger padding values will result a looser cropping. In particular, + a padding of 0.5 would double the width of the cropped area, a value of 1 + would triple it, and so forth. + - The 5 point face annotation scheme is assumed to be: + - det part 0 == left eye corner, outside part of eye. + - det part 1 == left eye corner, inside part of eye. + - det part 2 == right eye corner, outside part of eye. + - det part 3 == right eye corner, inside part of eye. + - det part 4 == immediately under the nose, right at the top of the philtrum. + !*/ + +// ---------------------------------------------------------------------------------------- + + std::vector<chip_details> get_face_chip_details ( + const std::vector<full_object_detection>& dets, + const unsigned long size = 200, + const double padding = 0.2 + ); + /*! + requires + - for all valid i: + - det[i].num_parts() == 68 + - size > 0 + - padding >= 0 + ensures + - This function is identical to the version of get_face_chip_details() defined + above except that it creates and returns an array of chip_details objects, + one for each input full_object_detection. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + image_type jitter_image( + const image_type& img, + dlib::rand& rnd + ); + /*! + requires + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - pixel_traits<typename image_traits<image_type>::pixel_type>::has_alpha == false + - img.size() > 0 + - img.nr() == img.nc() + ensures + - Randomly jitters the image a little bit and returns this new jittered image. + To be specific, the returned image has the same size as img and will look + generally similar. The difference is that the returned image will have been + slightly rotated, zoomed, and translated. There is also a 50% chance it will + be mirrored left to right. + !*/ + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_INTERPOlATION_ABSTRACT_ + diff --git a/ml/dlib/dlib/image_transforms/label_connected_blobs.h b/ml/dlib/dlib/image_transforms/label_connected_blobs.h new file mode 100644 index 000000000..c25346c76 --- /dev/null +++ b/ml/dlib/dlib/image_transforms/label_connected_blobs.h @@ -0,0 +1,188 @@ +// Copyright (C) 2011 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_LABEL_CONNeCTED_BLOBS_H_ +#define DLIB_LABEL_CONNeCTED_BLOBS_H_ + +#include "label_connected_blobs_abstract.h" +#include "../geometry.h" +#include <stack> +#include <vector> + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + struct neighbors_8 + { + void operator() ( + const point& p, + std::vector<point>& neighbors + ) const + { + neighbors.push_back(point(p.x()+1,p.y()+1)); + neighbors.push_back(point(p.x()+1,p.y() )); + neighbors.push_back(point(p.x()+1,p.y()-1)); + + neighbors.push_back(point(p.x(),p.y()+1)); + neighbors.push_back(point(p.x(),p.y()-1)); + + neighbors.push_back(point(p.x()-1,p.y()+1)); + neighbors.push_back(point(p.x()-1,p.y() )); + neighbors.push_back(point(p.x()-1,p.y()-1)); + } + }; + + struct neighbors_4 + { + void operator() ( + const point& p, + std::vector<point>& neighbors + ) const + { + neighbors.push_back(point(p.x()+1,p.y())); + neighbors.push_back(point(p.x()-1,p.y())); + neighbors.push_back(point(p.x(),p.y()+1)); + neighbors.push_back(point(p.x(),p.y()-1)); + } + }; + +// ---------------------------------------------------------------------------------------- + + struct connected_if_both_not_zero + { + template <typename image_type> + bool operator() ( + const image_type& img, + const point& a, + const point& b + ) const + { + return (img[a.y()][a.x()] != 0 && img[b.y()][b.x()] != 0); + } + }; + + struct connected_if_equal + { + template <typename image_type> + bool operator() ( + const image_type& img, + const point& a, + const point& b + ) const + { + return (img[a.y()][a.x()] == img[b.y()][b.x()]); + } + }; + +// ---------------------------------------------------------------------------------------- + + struct zero_pixels_are_background + { + template <typename image_type> + bool operator() ( + const image_type& img, + const point& p + ) const + { + return img[p.y()][p.x()] == 0; + } + + }; + + struct nothing_is_background + { + template <typename image_type> + bool operator() ( + const image_type&, + const point& + ) const + { + return false; + } + + }; + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type, + typename label_image_type, + typename background_functor_type, + typename neighbors_functor_type, + typename connected_functor_type + > + unsigned long label_connected_blobs ( + const image_type& img_, + const background_functor_type& is_background, + const neighbors_functor_type& get_neighbors, + const connected_functor_type& is_connected, + label_image_type& label_img_ + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(is_same_object(img_, label_img_) == false, + "\t unsigned long label_connected_blobs()" + << "\n\t The input image and output label image can't be the same object." + ); + + const_image_view<image_type> img(img_); + image_view<label_image_type> label_img(label_img_); + + std::stack<point> neighbors; + label_img.set_size(img.nr(), img.nc()); + assign_all_pixels(label_img, 0); + unsigned long next = 1; + + if (img.size() == 0) + return 0; + + const rectangle area = get_rect(img); + + std::vector<point> window; + + for (long r = 0; r < img.nr(); ++r) + { + for (long c = 0; c < img.nc(); ++c) + { + // skip already labeled pixels or background pixels + if (label_img[r][c] != 0 || is_background(img,point(c,r))) + continue; + + label_img[r][c] = next; + + // label all the neighbors of this point + neighbors.push(point(c,r)); + while (neighbors.size() > 0) + { + const point p = neighbors.top(); + neighbors.pop(); + + window.clear(); + get_neighbors(p, window); + + for (unsigned long i = 0; i < window.size(); ++i) + { + if (area.contains(window[i]) && // point in image. + !is_background(img,window[i]) && // isn't background. + label_img[window[i].y()][window[i].x()] == 0 && // haven't already labeled it. + is_connected(img, p, window[i])) // it's connected. + { + label_img[window[i].y()][window[i].x()] = next; + neighbors.push(window[i]); + } + } + } + + ++next; + } + } + + return next; + } +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_LABEL_CONNeCTED_BLOBS_H_ + diff --git a/ml/dlib/dlib/image_transforms/label_connected_blobs_abstract.h b/ml/dlib/dlib/image_transforms/label_connected_blobs_abstract.h new file mode 100644 index 000000000..5dc984000 --- /dev/null +++ b/ml/dlib/dlib/image_transforms/label_connected_blobs_abstract.h @@ -0,0 +1,199 @@ +// Copyright (C) 2011 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_LABEL_CONNeCTED_BLOBS_ABSTRACT_H_ +#ifdef DLIB_LABEL_CONNeCTED_BLOBS_ABSTRACT_H_ + +#include "../geometry.h" +#include <vector> +#include "../image_processing/generic_image.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + struct neighbors_8 + { + /*! + WHAT THIS OBJECT REPRESENTS + This object is a pixel neighborhood generating functor for + use with the label_connected_blobs() routine defined below. + !*/ + + void operator() ( + const point& p, + std::vector<point>& neighbors + ) const; + /*! + ensures + - adds the 8 neighboring pixels surrounding p into neighbors + !*/ + }; + + struct neighbors_4 + { + /*! + WHAT THIS OBJECT REPRESENTS + This object is a pixel neighborhood generating functor for + use with the label_connected_blobs() routine defined below. + !*/ + + void operator() ( + const point& p, + std::vector<point>& neighbors + ) const; + /*! + ensures + - adds the 4 neighboring pixels of p into neighbors. These + are the ones immediately to the left, top, right, and bottom. + !*/ + }; + +// ---------------------------------------------------------------------------------------- + + struct connected_if_both_not_zero + { + /*! + WHAT THIS OBJECT REPRESENTS + This object is a pixel connection testing functor for use + with the label_connected_blobs() routine defined below. + !*/ + + template <typename image_view_type> + bool operator() ( + const image_view_type& img, + const point& a, + const point& b + ) const + { + return (img[a.y()][a.x()] != 0 && img[b.y()][b.x()] != 0); + } + }; + + struct connected_if_equal + { + /*! + WHAT THIS OBJECT REPRESENTS + This object is a pixel connection testing functor for use + with the label_connected_blobs() routine defined below. + !*/ + + template <typename image_view_type> + bool operator() ( + const image_view_type& img, + const point& a, + const point& b + ) const + { + return (img[a.y()][a.x()] == img[b.y()][b.x()]); + } + }; + +// ---------------------------------------------------------------------------------------- + + struct zero_pixels_are_background + { + /*! + WHAT THIS OBJECT REPRESENTS + This object is a background testing functor for use + with the label_connected_blobs() routine defined below. + !*/ + + template <typename image_view_type> + bool operator() ( + const image_view_type& img, + const point& p + ) const + { + return img[p.y()][p.x()] == 0; + } + + }; + + struct nothing_is_background + { + /*! + WHAT THIS OBJECT REPRESENTS + This object is a background testing functor for use + with the label_connected_blobs() routine defined below. + !*/ + + template <typename image_view_type> + bool operator() ( + const image_view_type&, + const point& + ) const + { + return false; + } + + }; + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type, + typename label_image_type, + typename background_functor_type, + typename neighbors_functor_type, + typename connected_functor_type + > + unsigned long label_connected_blobs ( + const image_type& img, + const background_functor_type& is_background, + const neighbors_functor_type& get_neighbors, + const connected_functor_type& is_connected, + label_image_type& label_img + ); + /*! + requires + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - label_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h and it must contain integer pixels. + - is_background(img, point(c,r)) is a legal expression that evaluates to a bool. + - is_connected(img, point(c,r), point(c2,r2)) is a legal expression that + evaluates to a bool. + - get_neighbors(point(c,r), neighbors) is a legal expression where neighbors + is of type std::vector<point>. + - is_same_object(img, label_img) == false + ensures + - This function labels each of the connected blobs in img with a unique integer + label. + - An image can be thought of as a graph where pixels A and B are connected if + and only if the following two statements are satisfied: + - is_connected(img,A,B) == true + - get_neighbors(A, neighbors) results in neighbors containing B or + get_neighbors(B, neighbors) results in neighbors containing A. + Then this function can be understood as labeling all the connected components + of this pixel graph such that all pixels in a component get the same label while + pixels in different components get different labels. Note that there is a + special "background" component determined by is_background(). Any pixels which + are "background" always get a blob id of 0 regardless of any other considerations. + - #label_img.nr() == img.nr() + - #label_img.nc() == img.nc() + - for all valid r and c: + - #label_img[r][c] == the blob label number for pixel img[r][c]. + - #label_img[r][c] >= 0 + - if (is_background(img, point(c,r))) then + - #label_img[r][c] == 0 + - else + - #label_img[r][c] != 0 + - if (img.size() != 0) then + - returns max(mat(#label_img))+1 + (i.e. returns a number one greater than the maximum blob id number, + this is the number of blobs found.) + - else + - returns 0 + - blob labels are contiguous, therefore, the number returned by this function is + the number of blobs in the image (including the background blob). + - It is guaranteed that is_connected() and is_background() will never be + called with points outside the image. + !*/ + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_LABEL_CONNeCTED_BLOBS_ABSTRACT_H_ + diff --git a/ml/dlib/dlib/image_transforms/lbp.h b/ml/dlib/dlib/image_transforms/lbp.h new file mode 100644 index 000000000..b6bbac9cf --- /dev/null +++ b/ml/dlib/dlib/image_transforms/lbp.h @@ -0,0 +1,307 @@ +// Copyright (C) 2014 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_LBP_Hh_ +#define DLIB_LBP_Hh_ + +#include "lbp_abstract.h" +#include "../image_processing/generic_image.h" +#include "assign_image.h" +#include "../pixel.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type, + typename image_type2 + > + void make_uniform_lbp_image ( + const image_type& img_, + image_type2& lbp_ + ) + { + const static unsigned char uniform_lbps[] = { + 0, 1, 2, 3, 4, 58, 5, 6, 7, 58, 58, 58, 8, 58, 9, 10, 11, 58, 58, 58, 58, 58, + 58, 58, 12, 58, 58, 58, 13, 58, 14, 15, 16, 58, 58, 58, 58, 58, 58, 58, 58, 58, + 58, 58, 58, 58, 58, 58, 17, 58, 58, 58, 58, 58, 58, 58, 18, 58, 58, 58, 19, 58, + 20, 21, 22, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, + 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 23, 58, 58, 58, 58, 58, + 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 24, 58, 58, 58, 58, 58, 58, 58, 25, 58, + 58, 58, 26, 58, 27, 28, 29, 30, 58, 31, 58, 58, 58, 32, 58, 58, 58, 58, 58, 58, + 58, 33, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 34, 58, 58, + 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, + 58, 58, 58, 58, 58, 58, 58, 58, 58, 35, 36, 37, 58, 38, 58, 58, 58, 39, 58, 58, + 58, 58, 58, 58, 58, 40, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, + 58, 41, 42, 43, 58, 44, 58, 58, 58, 45, 58, 58, 58, 58, 58, 58, 58, 46, 47, 48, + 58, 49, 58, 58, 58, 50, 51, 52, 58, 53, 54, 55, 56, 57 + }; + + COMPILE_TIME_ASSERT(sizeof(uniform_lbps) == 256); + + const_image_view<image_type> img(img_); + image_view<image_type2> lbp(lbp_); + + lbp.set_size(img.nr(), img.nc()); + + // set all the border pixels to the "non-uniform LBP value". + assign_border_pixels(lbp, 1, 1, 58); + + typedef typename image_traits<image_type>::pixel_type pixel_type; + typedef typename pixel_traits<pixel_type>::basic_pixel_type basic_pixel_type; + + for (long r = 1; r+1 < img.nr(); ++r) + { + for (long c = 1; c+1 < img.nc(); ++c) + { + const basic_pixel_type pix = get_pixel_intensity(img[r][c]); + unsigned char b1 = 0; + unsigned char b2 = 0; + unsigned char b3 = 0; + unsigned char b4 = 0; + unsigned char b5 = 0; + unsigned char b6 = 0; + unsigned char b7 = 0; + unsigned char b8 = 0; + + unsigned char x = 0; + if (get_pixel_intensity(img[r-1][c-1]) > pix) b1 = 0x80; + if (get_pixel_intensity(img[r-1][c ]) > pix) b2 = 0x40; + if (get_pixel_intensity(img[r-1][c+1]) > pix) b3 = 0x20; + x |= b1; + if (get_pixel_intensity(img[r ][c-1]) > pix) b4 = 0x10; + x |= b2; + if (get_pixel_intensity(img[r ][c+1]) > pix) b5 = 0x08; + x |= b3; + if (get_pixel_intensity(img[r+1][c-1]) > pix) b6 = 0x04; + x |= b4; + if (get_pixel_intensity(img[r+1][c ]) > pix) b7 = 0x02; + x |= b5; + if (get_pixel_intensity(img[r+1][c+1]) > pix) b8 = 0x01; + + x |= b6; + x |= b7; + x |= b8; + + lbp[r][c] = uniform_lbps[x]; + } + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type, + typename T + > + void extract_histogram_descriptors ( + const image_type& img_, + const point& loc, + std::vector<T>& histograms, + const unsigned int cell_size = 10, + const unsigned int block_size = 4, + const unsigned int max_val = 58 + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(cell_size >= 1 && block_size >= 1 && max_val < 256 && + (unsigned int)max(mat(img_)) <= max_val, + "\t void extract_histogram_descriptors()" + << "\n\t Invalid inputs were given to this function." + << "\n\t cell_size: " << cell_size + << "\n\t block_size: " << block_size + << "\n\t max_val: " << max_val + << "\n\t max(mat(img_)): " << max(mat(img_)) + ); + + typedef typename image_traits<image_type>::pixel_type pixel_type; + COMPILE_TIME_ASSERT((is_same_type<pixel_type, unsigned char>::value)); + + const_image_view<image_type> img(img_); + + const rectangle area = get_rect(img); + const rectangle window = centered_rect(loc, block_size*cell_size, block_size*cell_size); + unsigned int cell_top = window.top(); + for (unsigned int br = 0; br < block_size; ++br) + { + unsigned int cell_left = window.left(); + for (unsigned int bc = 0; bc < block_size; ++bc) + { + // figure out the cell boundaries + rectangle cell(cell_left, cell_top, cell_left+cell_size-1, cell_top+cell_size-1); + cell = cell.intersect(area); + + // make the actual histogram for this cell + unsigned int hist[256] = {0}; + for (long r = cell.top(); r <= cell.bottom(); ++r) + { + for (long c = cell.left(); c <= cell.right(); ++c) + { + hist[img[r][c]]++; + } + } + + // copy histogram into the output. + histograms.insert(histograms.end(), hist, hist + max_val+1); + + cell_left += cell_size; + } + cell_top += cell_size; + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type, + typename T + > + void extract_uniform_lbp_descriptors ( + const image_type& img, + std::vector<T>& feats, + const unsigned int cell_size = 10 + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(cell_size >= 1, + "\t void extract_uniform_lbp_descriptors()" + << "\n\t Invalid inputs were given to this function." + << "\n\t cell_size: " << cell_size + ); + + feats.clear(); + array2d<unsigned char> lbp; + make_uniform_lbp_image(img, lbp); + for (long r = 0; r < lbp.nr(); r+=cell_size) + { + for (long c = 0; c < lbp.nc(); c+=cell_size) + { + const rectangle cell = rectangle(c,r,c+cell_size-1,r+cell_size-1).intersect(get_rect(lbp)); + // make the actual histogram for this cell + unsigned int hist[59] = {0}; + for (long r = cell.top(); r <= cell.bottom(); ++r) + { + for (long c = cell.left(); c <= cell.right(); ++c) + { + hist[lbp[r][c]]++; + } + } + + // copy histogram into the output. + feats.insert(feats.end(), hist, hist + 59); + } + } + + for (unsigned long i = 0; i < feats.size(); ++i) + feats[i] = std::sqrt(feats[i]); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type, + typename T + > + void extract_highdim_face_lbp_descriptors ( + const image_type& img, + const full_object_detection& det, + std::vector<T>& feats + ) + { + // make sure requires clause is not broken + DLIB_CASSERT(det.num_parts() == 68, + "\t void extract_highdim_face_lbp_descriptors()" + << "\n\t Invalid inputs were given to this function." + << "\n\t det.num_parts(): " << det.num_parts() + ); + + const unsigned long num_scales = 5; + feats.clear(); + dlib::vector<double,2> l, r; + double cnt = 0; + // Find the center of the left eye by averaging the points around + // the eye. + for (unsigned long i = 36; i <= 41; ++i) + { + l += det.part(i); + ++cnt; + } + l /= cnt; + + // Find the center of the right eye by averaging the points around + // the eye. + cnt = 0; + for (unsigned long i = 42; i <= 47; ++i) + { + r += det.part(i); + ++cnt; + } + r /= cnt; + + // We only do feature extraction from these face parts. These are things like the + // corners of the eyes and mouth and stuff like that. + std::vector<point> parts; + parts.reserve(30); + parts.push_back(l); + parts.push_back(r); + parts.push_back(det.part(17)); + parts.push_back(det.part(21)); + parts.push_back(det.part(22)); + parts.push_back(det.part(26)); + parts.push_back(det.part(36)); + parts.push_back(det.part(39)); + parts.push_back(det.part(42)); + parts.push_back(det.part(45)); + parts.push_back(det.part(27)); + parts.push_back(det.part(28)); + parts.push_back(det.part(29)); + parts.push_back(det.part(30)); + parts.push_back(det.part(31)); + parts.push_back(det.part(35)); + parts.push_back(det.part(33)); + parts.push_back(det.part(48)); + parts.push_back(det.part(54)); + parts.push_back(det.part(51)); + parts.push_back(det.part(57)); + + array2d<unsigned char> lbp; + make_uniform_lbp_image(img, lbp); + for (unsigned long i = 0; i < parts.size(); ++i) + extract_histogram_descriptors(lbp, parts[i], feats); + + if (num_scales > 1) + { + pyramid_down<4> pyr; + image_type img_temp; + pyr(img, img_temp); + unsigned long num_pyr_calls = 1; + + // now pull the features out at coarser scales + for (unsigned long iter = 1; iter < num_scales; ++iter) + { + // now do the feature extraction + make_uniform_lbp_image(img_temp, lbp); + for (unsigned long i = 0; i < parts.size(); ++i) + extract_histogram_descriptors(lbp, pyr.point_down(parts[i],num_pyr_calls), feats); + + if (iter+1 < num_scales) + { + pyr(img_temp); + ++num_pyr_calls; + } + } + } + + for (unsigned long i = 0; i < feats.size(); ++i) + feats[i] = std::sqrt(feats[i]); + + DLIB_ASSERT(feats.size() == 99120, feats.size()); + } + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_LBP_Hh_ + diff --git a/ml/dlib/dlib/image_transforms/lbp_abstract.h b/ml/dlib/dlib/image_transforms/lbp_abstract.h new file mode 100644 index 000000000..1a20082a2 --- /dev/null +++ b/ml/dlib/dlib/image_transforms/lbp_abstract.h @@ -0,0 +1,139 @@ +// Copyright (C) 2014 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_LBP_ABSTRACT_Hh_ +#ifdef DLIB_LBP_ABSTRACT_Hh_ + +#include "../image_processing/generic_image.h" +#include "../pixel.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type, + typename image_type2 + > + void make_uniform_lbp_image ( + const image_type& img, + image_type2& lbp + ); + /*! + requires + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - image_type2 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - image_type2 should contain a grayscale pixel type such as unsigned char. + ensures + - #lbp.nr() == img.nr() + - #lbp.nc() == img.nc() + - This function extracts the uniform local-binary-pattern feature at every pixel + and stores it into #lbp. In particular, we have the following for all valid + r and c: + - #lbp[r][c] == the uniform LBP for the 3x3 pixel window centered on img[r][c]. + In particular, this is a value in the range 0 to 58 inclusive. + - We use the idea of uniform LBPs from the paper: + Face Description with Local Binary Patterns: Application to Face Recognition + by Ahonen, Hadid, and Pietikainen. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type, + typename T + > + void extract_histogram_descriptors ( + const image_type& img, + const point& loc, + std::vector<T>& histograms, + const unsigned int cell_size = 10, + const unsigned int block_size = 4, + const unsigned int max_val = 58 + ); + /*! + requires + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - image_type contains unsigned char valued pixels. + - T is some scalar type like int or double + - All pixel values in img are <= max_val + - cell_size >= 1 + - block_size >= 1 + - max_val < 256 + ensures + - This function extracts histograms of pixel values from block_size*block_size + windows in the area in img immediately around img[loc.y()][loc.x()]. The + histograms are appended onto the end of #histograms. Each window is + cell_size pixels wide and tall. Moreover, the windows do not overlap. + - #histograms.size() == histograms.size() + block_size*block_size*(max_val+1) + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type, + typename T + > + void extract_uniform_lbp_descriptors ( + const image_type& img, + std::vector<T>& feats, + const unsigned int cell_size = 10 + ); + /*! + requires + - cell_size >= 1 + - T is some scalar type like int or double + ensures + - Extracts histograms of uniform local-binary-patterns from img. The + histograms are from densely tiled windows that are cell_size pixels wide and + tall. The windows do not overlap and cover all of img. + - #feats.size() == 59*(number of windows that fit into img) + (i.e. #feats contains the LBP histograms) + - We will have taken the square root of all the histogram elements. That is, + #feats[i] is the square root of the number of LBPs that appeared in its + corresponding window. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type, + typename T + > + void extract_highdim_face_lbp_descriptors ( + const image_type& img, + const full_object_detection& det, + std::vector<T>& feats + ); + /*! + requires + - T is some scalar type like int or double + - det.num_parts() == 68 + ensures + - This function extracts the high-dimensional LBP feature described in the + paper: + Blessing of Dimensionality: High-dimensional Feature and Its Efficient + Compression for Face Verification by Dong Chen, Xudong Cao, Fang Wen, and + Jian Sun + - #feats == the high-dimensional LBP descriptor. It is the concatenation of + many LBP histograms, each extracted from different scales and from different + windows around different face landmarks. We also take the square root of + each histogram element before storing it into #feats. + - #feats.size() == 99120 + - This function assumes img has already been aligned and normalized to a + standard size. + - This function assumes det contains a human face detection with face parts + annotated using the annotation scheme from the iBUG 300-W face landmark + dataset. This means that det.part(i) gives the locations of different face + landmarks according to the iBUG 300-W annotation scheme. + !*/ + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_LBP_ABSTRACT_Hh_ + diff --git a/ml/dlib/dlib/image_transforms/morphological_operations.h b/ml/dlib/dlib/image_transforms/morphological_operations.h new file mode 100644 index 000000000..a659e4bdc --- /dev/null +++ b/ml/dlib/dlib/image_transforms/morphological_operations.h @@ -0,0 +1,846 @@ +// Copyright (C) 2006 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_MORPHOLOGICAL_OPERATIONs_ +#define DLIB_MORPHOLOGICAL_OPERATIONs_ + +#include "../pixel.h" +#include "thresholding.h" +#include "morphological_operations_abstract.h" +#include "assign_image.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + namespace morphological_operations_helpers + { + template <typename image_type> + bool is_binary_image ( + const image_type& img_ + ) + /*! + ensures + - returns true if img_ contains only on_pixel and off_pixel values. + - returns false otherwise + !*/ + { + const_image_view<image_type> img(img_); + for (long r = 0; r < img.nr(); ++r) + { + for (long c = 0; c < img.nc(); ++c) + { + if (img[r][c] != on_pixel && img[r][c] != off_pixel) + { + return false; + } + } + } + return true; + } + + template < + long M, + long N + > + bool is_binary_image ( + const unsigned char (&structuring_element)[M][N] + ) + /*! + ensures + - returns true if structuring_element contains only on_pixel and off_pixel values. + - returns false otherwise + !*/ + { + for (long m = 0; m < M; ++m) + { + for (long n = 0; n < N; ++n) + { + if (structuring_element[m][n] != on_pixel && + structuring_element[m][n] != off_pixel) + { + return false; + } + } + } + return true; + } + + } + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type, + long M, + long N + > + void binary_dilation ( + const in_image_type& in_img_, + out_image_type& out_img_, + const unsigned char (&structuring_element)[M][N] + ) + { + typedef typename image_traits<in_image_type>::pixel_type in_pixel_type; + typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; + COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false ); + COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false ); + + using namespace morphological_operations_helpers; + COMPILE_TIME_ASSERT(M%2 == 1); + COMPILE_TIME_ASSERT(N%2 == 1); + DLIB_ASSERT(is_same_object(in_img_,out_img_) == false, + "\tvoid binary_dilation()" + << "\n\tYou must give two different image objects" + ); + COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type>::grayscale); + DLIB_ASSERT(is_binary_image(in_img_) , + "\tvoid binary_dilation()" + << "\n\tin_img must be a binary image" + ); + DLIB_ASSERT(is_binary_image(structuring_element) , + "\tvoid binary_dilation()" + << "\n\tthe structuring_element must be a binary image" + ); + + + const_image_view<in_image_type> in_img(in_img_); + image_view<out_image_type> out_img(out_img_); + + // if there isn't any input image then don't do anything + if (in_img.size() == 0) + { + out_img.clear(); + return; + } + + out_img.set_size(in_img.nr(),in_img.nc()); + + // apply the filter to the image + for (long r = 0; r < in_img.nr(); ++r) + { + for (long c = 0; c < in_img.nc(); ++c) + { + unsigned char out_pixel = off_pixel; + for (long m = 0; m < M && out_pixel == off_pixel; ++m) + { + for (long n = 0; n < N && out_pixel == off_pixel; ++n) + { + if (structuring_element[m][n] == on_pixel) + { + // if this pixel is inside the image then get it from the image + // but if it isn't just pretend it was an off_pixel value + if (r+m >= M/2 && c+n >= N/2 && + r+m-M/2 < in_img.nr() && c+n-N/2 < in_img.nc()) + { + out_pixel = in_img[r+m-M/2][c+n-N/2]; + } + } + } + } + assign_pixel(out_img[r][c], out_pixel); + } + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type, + long M, + long N + > + void binary_erosion ( + const in_image_type& in_img_, + out_image_type& out_img_, + const unsigned char (&structuring_element)[M][N] + ) + { + typedef typename image_traits<in_image_type>::pixel_type in_pixel_type; + typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; + COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false ); + COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false ); + + using namespace morphological_operations_helpers; + COMPILE_TIME_ASSERT(M%2 == 1); + COMPILE_TIME_ASSERT(N%2 == 1); + DLIB_ASSERT(is_same_object(in_img_,out_img_) == false, + "\tvoid binary_erosion()" + << "\n\tYou must give two different image objects" + ); + COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type>::grayscale); + DLIB_ASSERT(is_binary_image(in_img_) , + "\tvoid binary_erosion()" + << "\n\tin_img must be a binary image" + ); + DLIB_ASSERT(is_binary_image(structuring_element) , + "\tvoid binary_erosion()" + << "\n\tthe structuring_element must be a binary image" + ); + + const_image_view<in_image_type> in_img(in_img_); + image_view<out_image_type> out_img(out_img_); + + + // if there isn't any input image then don't do anything + if (in_img.size() == 0) + { + out_img.clear(); + return; + } + + out_img.set_size(in_img.nr(),in_img.nc()); + + // apply the filter to the image + for (long r = 0; r < in_img.nr(); ++r) + { + for (long c = 0; c < in_img.nc(); ++c) + { + unsigned char out_pixel = on_pixel; + for (long m = 0; m < M && out_pixel == on_pixel; ++m) + { + for (long n = 0; n < N && out_pixel == on_pixel; ++n) + { + if (structuring_element[m][n] == on_pixel) + { + // if this pixel is inside the image then get it from the image + // but if it isn't just pretend it was an off_pixel value + if (r+m >= M/2 && c+n >= N/2 && + r+m-M/2 < in_img.nr() && c+n-N/2 < in_img.nc()) + { + out_pixel = in_img[r+m-M/2][c+n-N/2]; + } + else + { + out_pixel = off_pixel; + } + } + } + } + assign_pixel(out_img[r][c], out_pixel); + } + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type, + long M, + long N + > + void binary_open ( + const in_image_type& in_img, + out_image_type& out_img, + const unsigned char (&structuring_element)[M][N], + const unsigned long iter = 1 + ) + { + typedef typename image_traits<in_image_type>::pixel_type in_pixel_type; + typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; + COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false ); + COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false ); + + using namespace morphological_operations_helpers; + COMPILE_TIME_ASSERT(M%2 == 1); + COMPILE_TIME_ASSERT(N%2 == 1); + DLIB_ASSERT(is_same_object(in_img,out_img) == false, + "\tvoid binary_open()" + << "\n\tYou must give two different image objects" + ); + COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type>::grayscale); + DLIB_ASSERT(is_binary_image(in_img) , + "\tvoid binary_open()" + << "\n\tin_img must be a binary image" + ); + DLIB_ASSERT(is_binary_image(structuring_element) , + "\tvoid binary_open()" + << "\n\tthe structuring_element must be a binary image" + ); + + + // if there isn't any input image then don't do anything + if (num_rows(in_img)*num_columns(in_img) == 0) + { + set_image_size(out_img, 0,0); + return; + } + + set_image_size(out_img, num_rows(in_img), num_columns(in_img)); + + if (iter == 0) + { + // just copy the image over + assign_image(out_img, in_img); + } + else if (iter == 1) + { + in_image_type temp; + binary_erosion(in_img,temp,structuring_element); + binary_dilation(temp,out_img,structuring_element); + } + else + { + in_image_type temp1, temp2; + binary_erosion(in_img,temp1,structuring_element); + + // do the extra erosions + for (unsigned long i = 1; i < iter; ++i) + { + swap(temp1, temp2); + binary_erosion(temp2,temp1,structuring_element); + } + + // do the extra dilations + for (unsigned long i = 1; i < iter; ++i) + { + swap(temp1, temp2); + binary_dilation(temp2,temp1,structuring_element); + } + + binary_dilation(temp1,out_img,structuring_element); + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type, + long M, + long N + > + void binary_close ( + const in_image_type& in_img, + out_image_type& out_img, + const unsigned char (&structuring_element)[M][N], + const unsigned long iter = 1 + ) + { + typedef typename image_traits<in_image_type>::pixel_type in_pixel_type; + typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; + COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false ); + COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false ); + + + using namespace morphological_operations_helpers; + COMPILE_TIME_ASSERT(M%2 == 1); + COMPILE_TIME_ASSERT(N%2 == 1); + DLIB_ASSERT(is_same_object(in_img,out_img) == false, + "\tvoid binary_close()" + << "\n\tYou must give two different image objects" + ); + COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type>::grayscale); + DLIB_ASSERT(is_binary_image(in_img) , + "\tvoid binary_close()" + << "\n\tin_img must be a binary image" + ); + DLIB_ASSERT(is_binary_image(structuring_element) , + "\tvoid binary_close()" + << "\n\tthe structuring_element must be a binary image" + ); + + + // if there isn't any input image then don't do anything + if (num_rows(in_img)*num_columns(in_img) == 0) + { + set_image_size(out_img, 0,0); + return; + } + + set_image_size(out_img, num_rows(in_img), num_columns(in_img)); + + if (iter == 0) + { + // just copy the image over + assign_image(out_img, in_img); + } + else if (iter == 1) + { + in_image_type temp; + binary_dilation(in_img,temp,structuring_element); + binary_erosion(temp,out_img,structuring_element); + } + else + { + in_image_type temp1, temp2; + binary_dilation(in_img,temp1,structuring_element); + + // do the extra dilations + for (unsigned long i = 1; i < iter; ++i) + { + swap(temp1, temp2); + binary_dilation(temp2,temp1,structuring_element); + } + + // do the extra erosions + for (unsigned long i = 1; i < iter; ++i) + { + swap(temp1, temp2); + binary_erosion(temp2,temp1,structuring_element); + } + + binary_erosion(temp1,out_img,structuring_element); + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type1, + typename in_image_type2, + typename out_image_type + > + void binary_intersection ( + const in_image_type1& in_img1_, + const in_image_type2& in_img2_, + out_image_type& out_img_ + ) + { + typedef typename image_traits<in_image_type1>::pixel_type in_pixel_type1; + typedef typename image_traits<in_image_type2>::pixel_type in_pixel_type2; + typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; + COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type1>::has_alpha == false ); + COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type2>::has_alpha == false ); + COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false ); + + using namespace morphological_operations_helpers; + COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type1>::grayscale); + COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type2>::grayscale); + DLIB_ASSERT(is_binary_image(in_img1_) , + "\tvoid binary_intersection()" + << "\n\tin_img1 must be a binary image" + ); + DLIB_ASSERT(is_binary_image(in_img2_) , + "\tvoid binary_intersection()" + << "\n\tin_img2 must be a binary image" + ); + + const_image_view<in_image_type1> in_img1(in_img1_); + const_image_view<in_image_type2> in_img2(in_img2_); + image_view<out_image_type> out_img(out_img_); + + DLIB_ASSERT(in_img1.nc() == in_img2.nc(), + "\tvoid binary_intersection()" + << "\n\tin_img1 and in_img2 must have the same ncs." + << "\n\tin_img1.nc(): " << in_img1.nc() + << "\n\tin_img2.nc(): " << in_img2.nc() + ); + DLIB_ASSERT(in_img1.nr() == in_img2.nr(), + "\tvoid binary_intersection()" + << "\n\tin_img1 and in_img2 must have the same nrs." + << "\n\tin_img1.nr(): " << in_img1.nr() + << "\n\tin_img2.nr(): " << in_img2.nr() + ); + + + + // if there isn't any input image then don't do anything + if (in_img1.size() == 0) + { + out_img.clear(); + return; + } + + out_img.set_size(in_img1.nr(),in_img1.nc()); + + for (long r = 0; r < in_img1.nr(); ++r) + { + for (long c = 0; c < in_img1.nc(); ++c) + { + if (in_img1[r][c] == on_pixel && in_img2[r][c] == on_pixel) + assign_pixel(out_img[r][c], on_pixel); + else + assign_pixel(out_img[r][c], off_pixel); + } + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type1, + typename in_image_type2, + typename out_image_type + > + void binary_union ( + const in_image_type1& in_img1_, + const in_image_type2& in_img2_, + out_image_type& out_img_ + ) + { + typedef typename image_traits<in_image_type1>::pixel_type in_pixel_type1; + typedef typename image_traits<in_image_type2>::pixel_type in_pixel_type2; + typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; + COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type1>::has_alpha == false ); + COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type2>::has_alpha == false ); + COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false ); + + + using namespace morphological_operations_helpers; + COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type1>::grayscale); + COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type2>::grayscale); + DLIB_ASSERT(is_binary_image(in_img1_) , + "\tvoid binary_intersection()" + << "\n\tin_img1 must be a binary image" + ); + DLIB_ASSERT(is_binary_image(in_img2_) , + "\tvoid binary_intersection()" + << "\n\tin_img2 must be a binary image" + ); + + const_image_view<in_image_type1> in_img1(in_img1_); + const_image_view<in_image_type2> in_img2(in_img2_); + image_view<out_image_type> out_img(out_img_); + + DLIB_ASSERT(in_img1.nc() == in_img2.nc(), + "\tvoid binary_intersection()" + << "\n\tin_img1 and in_img2 must have the same ncs." + << "\n\tin_img1.nc(): " << in_img1.nc() + << "\n\tin_img2.nc(): " << in_img2.nc() + ); + DLIB_ASSERT(in_img1.nr() == in_img2.nr(), + "\tvoid binary_intersection()" + << "\n\tin_img1 and in_img2 must have the same nrs." + << "\n\tin_img1.nr(): " << in_img1.nr() + << "\n\tin_img2.nr(): " << in_img2.nr() + ); + + + + // if there isn't any input image then don't do anything + if (in_img1.size() == 0) + { + out_img.clear(); + return; + } + + out_img.set_size(in_img1.nr(),in_img1.nc()); + + for (long r = 0; r < in_img1.nr(); ++r) + { + for (long c = 0; c < in_img1.nc(); ++c) + { + if (in_img1[r][c] == on_pixel || in_img2[r][c] == on_pixel) + assign_pixel(out_img[r][c], on_pixel); + else + assign_pixel(out_img[r][c], off_pixel); + } + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type1, + typename in_image_type2, + typename out_image_type + > + void binary_difference ( + const in_image_type1& in_img1_, + const in_image_type2& in_img2_, + out_image_type& out_img_ + ) + { + typedef typename image_traits<in_image_type1>::pixel_type in_pixel_type1; + typedef typename image_traits<in_image_type2>::pixel_type in_pixel_type2; + typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; + COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type1>::has_alpha == false ); + COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type2>::has_alpha == false ); + COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false ); + + using namespace morphological_operations_helpers; + COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type1>::grayscale); + COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type2>::grayscale); + DLIB_ASSERT(is_binary_image(in_img1_) , + "\tvoid binary_difference()" + << "\n\tin_img1 must be a binary image" + ); + DLIB_ASSERT(is_binary_image(in_img2_) , + "\tvoid binary_difference()" + << "\n\tin_img2 must be a binary image" + ); + + const_image_view<in_image_type1> in_img1(in_img1_); + const_image_view<in_image_type2> in_img2(in_img2_); + image_view<out_image_type> out_img(out_img_); + + DLIB_ASSERT(in_img1.nc() == in_img2.nc(), + "\tvoid binary_difference()" + << "\n\tin_img1 and in_img2 must have the same ncs." + << "\n\tin_img1.nc(): " << in_img1.nc() + << "\n\tin_img2.nc(): " << in_img2.nc() + ); + DLIB_ASSERT(in_img1.nr() == in_img2.nr(), + "\tvoid binary_difference()" + << "\n\tin_img1 and in_img2 must have the same nrs." + << "\n\tin_img1.nr(): " << in_img1.nr() + << "\n\tin_img2.nr(): " << in_img2.nr() + ); + + + + // if there isn't any input image then don't do anything + if (in_img1.size() == 0) + { + out_img.clear(); + return; + } + + out_img.set_size(in_img1.nr(),in_img1.nc()); + + for (long r = 0; r < in_img1.nr(); ++r) + { + for (long c = 0; c < in_img1.nc(); ++c) + { + if (in_img1[r][c] == on_pixel && in_img2[r][c] == off_pixel) + assign_pixel(out_img[r][c], on_pixel); + else + assign_pixel(out_img[r][c], off_pixel); + } + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type + > + void binary_complement ( + const in_image_type& in_img_, + out_image_type& out_img_ + ) + { + typedef typename image_traits<in_image_type>::pixel_type in_pixel_type; + typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; + COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false ); + COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false ); + + + using namespace morphological_operations_helpers; + COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type>::grayscale); + DLIB_ASSERT(is_binary_image(in_img_) , + "\tvoid binary_complement()" + << "\n\tin_img must be a binary image" + ); + + const_image_view<in_image_type> in_img(in_img_); + image_view<out_image_type> out_img(out_img_); + + // if there isn't any input image then don't do anything + if (in_img.size() == 0) + { + out_img.clear(); + return; + } + + out_img.set_size(in_img.nr(),in_img.nc()); + + for (long r = 0; r < in_img.nr(); ++r) + { + for (long c = 0; c < in_img.nc(); ++c) + { + if (in_img[r][c] == on_pixel) + assign_pixel(out_img[r][c], off_pixel); + else + assign_pixel(out_img[r][c], on_pixel); + } + } + } + + template < + typename image_type + > + void binary_complement ( + image_type& img + ) + { + binary_complement(img,img); + } + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + + namespace impl + { + template <typename image_type> + inline bool should_remove_pixel ( + const image_type& img, + long r, + long c, + int iter + ) + { + unsigned int p2 = img[r-1][c]; + unsigned int p3 = img[r-1][c+1]; + unsigned int p4 = img[r][c+1]; + unsigned int p5 = img[r+1][c+1]; + unsigned int p6 = img[r+1][c]; + unsigned int p7 = img[r+1][c-1]; + unsigned int p8 = img[r][c-1]; + unsigned int p9 = img[r-1][c-1]; + + int A = (p2 == 0 && p3 == 255) + (p3 == 0 && p4 == 255) + + (p4 == 0 && p5 == 255) + (p5 == 0 && p6 == 255) + + (p6 == 0 && p7 == 255) + (p7 == 0 && p8 == 255) + + (p8 == 0 && p9 == 255) + (p9 == 0 && p2 == 255); + int B = p2 + p3 + p4 + p5 + p6 + p7 + p8 + p9; + int m1 = iter == 0 ? (p2 * p4 * p6) : (p2 * p4 * p8); + int m2 = iter == 0 ? (p4 * p6 * p8) : (p2 * p6 * p8); + // Decide if we should remove the pixel img[r][c]. + return (A == 1 && (B >= 2*255 && B <= 6*255) && m1 == 0 && m2 == 0); + } + + template <typename image_type> + inline void add_to_remove ( + std::vector<point>& to_remove, + array2d<unsigned char>& marker, + const image_type& img, + long r, + long c, + int iter + ) + { + if (marker[r][c]&&should_remove_pixel(img,r,c,iter)) + { + to_remove.push_back(point(c,r)); + marker[r][c] = 0; + } + } + + template <typename image_type> + inline bool is_bw_border_pixel( + const image_type& img, + long r, + long c + ) + { + unsigned int p2 = img[r-1][c]; + unsigned int p3 = img[r-1][c+1]; + unsigned int p4 = img[r][c+1]; + unsigned int p5 = img[r+1][c+1]; + unsigned int p6 = img[r+1][c]; + unsigned int p7 = img[r+1][c-1]; + unsigned int p8 = img[r][c-1]; + unsigned int p9 = img[r-1][c-1]; + + int B = p2 + p3 + p4 + p5 + p6 + p7 + p8 + p9; + // If you are on but at least one of your neighbors isn't. + return B<8*255 && img[r][c]; + + } + + inline void add_if( + std::vector<point>& to_check2, + const array2d<unsigned char>& marker, + long c, + long r + ) + { + if (marker[r][c]) + to_check2.push_back(point(c,r)); + } + + } // end namespace impl + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + void skeleton( + image_type& img_ + ) + { + /* + The implementation of this function is based on the paper + "A fast parallel algorithm for thinning digital patterns” by T.Y. Zhang and C.Y. Suen. + and also the excellent discussion of it at: + http://opencv-code.com/quick-tips/implementation-of-thinning-algorithm-in-opencv/ + */ + + typedef typename image_traits<image_type>::pixel_type pixel_type; + + // This function only works on grayscale images + COMPILE_TIME_ASSERT(pixel_traits<pixel_type>::grayscale); + + using namespace impl; + // Note that it's important to zero the border for 2 reasons. First, it allows + // thinning to being at the border of the image. But more importantly, it causes + // the mask to have a border of 0 pixels as well which we use later to avoid + // indexing outside the image inside add_to_remove(). + zero_border_pixels(img_,1,1); + image_view<image_type> img(img_); + + // We use the marker to keep track of pixels we have committed to removing but + // haven't yet removed from img. + array2d<unsigned char> marker(img.nr(), img.nc()); + assign_image(marker, img); + + + // Begin by making a list of the pixels on the borders of binary blobs. + std::vector<point> to_remove, to_check, to_check2; + for (int r = 1; r < img.nr()-1; r++) + { + for (int c = 1; c < img.nc()-1; c++) + { + if (is_bw_border_pixel(img, r, c)) + { + to_check.push_back(point(c,r)); + } + } + } + + // Now start iteratively looking at the border pixels and removing them. + while(to_check.size() != 0) + { + for (int iter = 0; iter <= 1; ++iter) + { + // Check which pixels we should remove + to_remove.clear(); + for (unsigned long i = 0; i < to_check.size(); ++i) + { + long r = to_check[i].y(); + long c = to_check[i].x(); + add_to_remove(to_remove, marker, img, r, c, iter); + } + for (unsigned long i = 0; i < to_check2.size(); ++i) + { + long r = to_check2[i].y(); + long c = to_check2[i].x(); + add_to_remove(to_remove, marker, img, r, c, iter); + } + // Now remove those pixels. Also add their neighbors into the "to check" + // pixel list for the next iteration. + for (unsigned long i = 0; i < to_remove.size(); ++i) + { + long r = to_remove[i].y(); + long c = to_remove[i].x(); + // remove the pixel + img[r][c] = 0; + add_if(to_check2, marker, c-1, r-1); + add_if(to_check2, marker, c, r-1); + add_if(to_check2, marker, c+1, r-1); + add_if(to_check2, marker, c-1, r); + add_if(to_check2, marker, c+1, r); + add_if(to_check2, marker, c-1, r+1); + add_if(to_check2, marker, c, r+1); + add_if(to_check2, marker, c+1, r+1); + } + } + to_check.clear(); + to_check.swap(to_check2); + } + } + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_MORPHOLOGICAL_OPERATIONs_ + diff --git a/ml/dlib/dlib/image_transforms/morphological_operations_abstract.h b/ml/dlib/dlib/image_transforms/morphological_operations_abstract.h new file mode 100644 index 000000000..c69bdd1ca --- /dev/null +++ b/ml/dlib/dlib/image_transforms/morphological_operations_abstract.h @@ -0,0 +1,316 @@ +// Copyright (C) 2006 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_MORPHOLOGICAL_OPERATIONs_ABSTRACT_ +#ifdef DLIB_MORPHOLOGICAL_OPERATIONs_ABSTRACT_ + +#include "../pixel.h" +#include "thresholding_abstract.h" +#include "../image_processing/generic_image.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type, + long M, + long N + > + void binary_dilation ( + const in_image_type& in_img, + out_image_type& out_img, + const unsigned char (&structuring_element)[M][N] + ); + /*! + requires + - in_image_type and out_image_type are image objects that implement the + interface defined in dlib/image_processing/generic_image.h + - in_img must contain a grayscale pixel type. + - both in_img and out_img must contain pixels with no alpha channel. + (i.e. pixel_traits::has_alpha==false for their pixels) + - is_same_object(in_img,out_img) == false + - M % 2 == 1 (i.e. M must be odd) + - N % 2 == 1 (i.e. N must be odd) + - all pixels in in_img are set to either on_pixel or off_pixel + (i.e. it must be a binary image) + - all pixels in structuring_element are set to either on_pixel or off_pixel + (i.e. it must be a binary image) + ensures + - Does a binary dilation of in_img using the given structuring element and + stores the result in out_img. + - #out_img.nc() == in_img.nc() + - #out_img.nr() == in_img.nr() + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type, + long M, + long N + > + void binary_erosion ( + const in_image_type& in_img, + out_image_type& out_img, + const unsigned char (&structuring_element)[M][N] + ); + /*! + requires + - in_image_type and out_image_type are image objects that implement the + interface defined in dlib/image_processing/generic_image.h + - in_img must contain a grayscale pixel type. + - both in_img and out_img must contain pixels with no alpha channel. + (i.e. pixel_traits::has_alpha==false for their pixels) + - is_same_object(in_img,out_img) == false + - M % 2 == 1 (i.e. M must be odd) + - N % 2 == 1 (i.e. N must be odd) + - all pixels in in_img are set to either on_pixel or off_pixel + (i.e. it must be a binary image) + - all pixels in structuring_element are set to either on_pixel or off_pixel + (i.e. it must be a binary image) + ensures + - Does a binary erosion of in_img using the given structuring element and + stores the result in out_img. + - #out_img.nc() == in_img.nc() + - #out_img.nr() == in_img.nr() + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type, + long M, + long N + > + void binary_open ( + const in_image_type& in_img, + out_image_type& out_img, + const unsigned char (&structuring_element)[M][N], + const unsigned long iter = 1 + ); + /*! + requires + - in_image_type and out_image_type are image objects that implement the + interface defined in dlib/image_processing/generic_image.h + - in_img must contain a grayscale pixel type. + - both in_img and out_img must contain pixels with no alpha channel. + (i.e. pixel_traits::has_alpha==false for their pixels) + - is_same_object(in_img,out_img) == false + - M % 2 == 1 (i.e. M must be odd) + - N % 2 == 1 (i.e. N must be odd) + - all pixels in in_img are set to either on_pixel or off_pixel + (i.e. it must be a binary image) + - all pixels in structuring_element are set to either on_pixel or off_pixel + (i.e. it must be a binary image) + ensures + - Does a binary open of in_img using the given structuring element and + stores the result in out_img. Specifically, iter iterations of binary + erosion are applied and then iter iterations of binary dilation. + - #out_img.nc() == in_img.nc() + - #out_img.nr() == in_img.nr() + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type, + long M, + long N + > + void binary_close ( + const in_image_type& in_img, + out_image_type& out_img, + const unsigned char (&structuring_element)[M][N], + const unsigned long iter = 1 + ); + /*! + requires + - in_image_type and out_image_type are image objects that implement the + interface defined in dlib/image_processing/generic_image.h + - in_img must contain a grayscale pixel type. + - both in_img and out_img must contain pixels with no alpha channel. + (i.e. pixel_traits::has_alpha==false for their pixels) + - is_same_object(in_img,out_img) == false + - M % 2 == 1 (i.e. M must be odd) + - N % 2 == 1 (i.e. N must be odd) + - all pixels in in_img are set to either on_pixel or off_pixel + (i.e. it must be a binary image) + - all pixels in structuring_element are set to either on_pixel or off_pixel + (i.e. it must be a binary image) + ensures + - Does a binary close of in_img using the given structuring element and + stores the result in out_img. Specifically, iter iterations of binary + dilation are applied and then iter iterations of binary erosion. + - #out_img.nc() == in_img.nc() + - #out_img.nr() == in_img.nr() + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type1, + typename in_image_type2, + typename out_image_type + > + void binary_intersection ( + const in_image_type1& in_img1, + const in_image_type2& in_img2, + out_image_type& out_img + ); + /*! + requires + - in_image_type1, in_image_type2, and out_image_type are image objects that + implement the interface defined in dlib/image_processing/generic_image.h + - in_img1 and in_img2 must contain grayscale pixel types. + - in_img1, in_img2, and out_img must contain pixels with no alpha channel. + (i.e. pixel_traits::has_alpha==false for their pixels) + - all pixels in in_img1 and in_img2 are set to either on_pixel or off_pixel + (i.e. they must be binary images) + - in_img1.nc() == in_img2.nc() + - in_img1.nr() == in_img2.nr() + ensures + - #out_img == the binary intersection of in_img1 and in_img2. (i.e. All + the pixels that are set to on_pixel in both in_img1 and in_img2 will be set + to on_pixel in #out_img. All other pixels will be set to off_pixel) + - #out_img.nc() == in_img.nc() + - #out_img.nr() == in_img.nr() + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type1, + typename in_image_type2, + typename out_image_type + > + void binary_union ( + const in_image_type1& in_img1, + const in_image_type2& in_img2, + out_image_type& out_img + ); + /*! + requires + - in_image_type1, in_image_type2, and out_image_type are image objects that + implement the interface defined in dlib/image_processing/generic_image.h + - in_img1 and in_img2 must contain grayscale pixel types. + - in_img1, in_img2, and out_img must contain pixels with no alpha channel. + (i.e. pixel_traits::has_alpha==false for their pixels) + - all pixels in in_img1 and in_img2 are set to either on_pixel or off_pixel + (i.e. they must be binary images) + - in_img1.nc() == in_img2.nc() + - in_img1.nr() == in_img2.nr() + ensures + - #out_img == the binary union of in_img1 and in_img2. (i.e. All + the pixels that are set to on_pixel in in_img1 and/or in_img2 will be set + to on_pixel in #out_img. All other pixels will be set to off_pixel) + - #out_img.nc() == in_img.nc() + - #out_img.nr() == in_img.nr() + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type1, + typename in_image_type2, + typename out_image_type + > + void binary_difference ( + const in_image_type1& in_img1, + const in_image_type2& in_img2, + out_image_type& out_img + ); + /*! + requires + - in_image_type1, in_image_type2, and out_image_type are image objects that + implement the interface defined in dlib/image_processing/generic_image.h + - in_img1 and in_img2 must contain grayscale pixel types. + - in_img1, in_img2, and out_img must contain pixels with no alpha channel. + (i.e. pixel_traits::has_alpha==false for their pixels) + - all pixels in in_img1 and in_img2 are set to either on_pixel or off_pixel + (i.e. they must be binary images) + - in_img1.nc() == in_img2.nc() + - in_img1.nr() == in_img2.nr() + ensures + - #out_img == the binary difference of in_img1 and in_img2. (i.e. #out_img + will be a copy of in_img1 except that any pixels in in_img2 that are set to + on_pixel will be set to off_pixel) + - #out_img.nc() == in_img.nc() + - #out_img.nr() == in_img.nr() + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type + > + void binary_complement ( + const in_image_type& in_img, + out_image_type& out_img + ); + /*! + requires + - in_image_type and out_image_type are image objects that implement the + interface defined in dlib/image_processing/generic_image.h + - in_img must contain a grayscale pixel type. + - both in_img and out_img must contain pixels with no alpha channel. + (i.e. pixel_traits::has_alpha==false for their pixels) + - all pixels in in_img are set to either on_pixel or off_pixel + (i.e. it must be a binary image) + ensures + - #out_img == the binary complement of in_img. (i.e. For each pixel in + in_img, if it is on_pixel then it will be set to off_pixel in #out_img and + if it was off_pixel in in_img then it will be on_pixel in #out_img) + - #out_img.nc() == in_img.nc() + - #out_img.nr() == in_img.nr() + !*/ + + template < + typename image_type + > + void binary_complement ( + image_type& img + ); + /*! + requires + - it must be valid to call binary_complement(img,img); + ensures + - calls binary_complement(img,img); + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + void skeleton( + image_type& img + ); + /*! + requires + - image_type is an object that implement the interface defined in + dlib/image_processing/generic_image.h + - img must contain a grayscale pixel type. + - all pixels in img are set to either on_pixel or off_pixel. + (i.e. it must be a binary image) + ensures + - This function computes the skeletonization of img and stores the result in + #img. That is, given a binary image, we progressively thin the binary blobs + (composed of on_pixel values) until only a single pixel wide skeleton of the + original blobs remains. + - #img.nc() == img.nc() + - #img.nr() == img.nr() + !*/ + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_MORPHOLOGICAL_OPERATIONs_ABSTRACT_ + + diff --git a/ml/dlib/dlib/image_transforms/random_color_transform.h b/ml/dlib/dlib/image_transforms/random_color_transform.h new file mode 100644 index 000000000..7433da1f7 --- /dev/null +++ b/ml/dlib/dlib/image_transforms/random_color_transform.h @@ -0,0 +1,157 @@ +// Copyright (C) 2016 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_RANDOM_cOLOR_TRANSFORM_Hh_ +#define DLIB_RANDOM_cOLOR_TRANSFORM_Hh_ + +#include "random_color_transform_abstract.h" +#include "../image_processing/generic_image.h" +#include "../pixel.h" +#include "../rand.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + class random_color_transform + { + public: + + random_color_transform ( + dlib::rand& rnd, + const double gamma_magnitude = 0.5, + const double color_magnitude = 0.2 + ) + { + // pick a random gamma correction factor. + double gamma = std::max(0.0, 1 + gamma_magnitude*(rnd.get_random_double()-0.5)); + + // pick a random color balancing scheme. + double red_scale = 1-rnd.get_random_double()*color_magnitude; + double green_scale = 1-rnd.get_random_double()*color_magnitude; + double blue_scale = 1-rnd.get_random_double()*color_magnitude; + const double m = 255*std::max(std::max(red_scale,green_scale),blue_scale); + red_scale /= m; + green_scale /= m; + blue_scale /= m; + + // Now compute a lookup table for all the color channels. The table tells us + // what the transform does. + table.resize(256*3); + unsigned long i = 0; + for (int k = 0; k < 256; ++k) + { + double v = 255*std::pow(k*red_scale, gamma); + table[i++] = (unsigned char)(v + 0.5); + } + for (int k = 0; k < 256; ++k) + { + double v = 255*std::pow(k*green_scale, gamma); + table[i++] = (unsigned char)(v + 0.5); + } + for (int k = 0; k < 256; ++k) + { + double v = 255*std::pow(k*blue_scale, gamma); + table[i++] = (unsigned char)(v + 0.5); + } + } + + rgb_pixel operator()(rgb_pixel p) const + { + p.red = table[(unsigned int)p.red]; + p.green = table[(unsigned int)p.green+256]; + p.blue = table[(unsigned int)p.blue+512]; + return p; + } + + private: + std::vector<unsigned char> table; + }; + +// ---------------------------------------------------------------------------------------- + + template <typename image_type> + void disturb_colors ( + image_type& img_, + dlib::rand& rnd, + const double gamma_magnitude = 0.5, + const double color_magnitude = 0.2 + ) + { + image_view<image_type> img(img_); + random_color_transform tform(rnd, gamma_magnitude, color_magnitude); + for (long r = 0; r < img.nr(); ++r) + { + for (long c = 0; c < img.nc(); ++c) + { + rgb_pixel temp; + assign_pixel(temp, img[r][c]); + temp = tform(temp); + assign_pixel(img[r][c], temp); + } + } + } + +// ---------------------------------------------------------------------------------------- + + template <typename image_type> + void apply_random_color_offset ( + image_type& img_, + dlib::rand& rnd + ) + { + // Make a random color offset. This tform matrix came from looking at the + // covariance matrix of RGB values in a bunch of images. In particular, if you + // multiply Gaussian random vectors by tform it will result in vectors with the + // same covariance matrix as the original RGB data. Also, this color transform is + // what is suggested by the paper: + // Krizhevsky, Alex, Ilya Sutskever, and Geoffrey E. Hinton. "Imagenet + // classification with deep convolutional neural networks." Advances in neural + // information processing systems. 2012. + // Except that we used the square root of the eigenvalues (which I'm pretty sure is + // what the authors intended). + matrix<double,3,3> tform; + tform = -66.379, 25.094, 6.79698, + -68.0492, -0.302309, -13.9539, + -68.4907, -24.0199, 7.27653; + matrix<double,3,1> v; + v = rnd.get_random_gaussian(),rnd.get_random_gaussian(),rnd.get_random_gaussian(); + v = round(tform*0.1*v); + const int roffset = v(0); + const int goffset = v(1); + const int boffset = v(2); + + // Make up lookup tables that apply the color mapping so we don't have to put a + // bunch of complicated conditional branches in the loop below. + unsigned char rtable[256]; + unsigned char gtable[256]; + unsigned char btable[256]; + for (int i = 0; i < 256; ++i) + { + rtable[i] = put_in_range(0, 255, i+roffset); + gtable[i] = put_in_range(0, 255, i+goffset); + btable[i] = put_in_range(0, 255, i+boffset); + } + + // now transform the image. + image_view<image_type> img(img_); + for (long r = 0; r < img.nr(); ++r) + { + for (long c = 0; c < img.nc(); ++c) + { + rgb_pixel temp; + assign_pixel(temp, img[r][c]); + temp.red = rtable[temp.red]; + temp.green = gtable[temp.green]; + temp.blue = btable[temp.blue]; + assign_pixel(img[r][c], temp); + } + } + } + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_RANDOM_cOLOR_TRANSFORM_Hh_ + diff --git a/ml/dlib/dlib/image_transforms/random_color_transform_abstract.h b/ml/dlib/dlib/image_transforms/random_color_transform_abstract.h new file mode 100644 index 000000000..5826e16a6 --- /dev/null +++ b/ml/dlib/dlib/image_transforms/random_color_transform_abstract.h @@ -0,0 +1,94 @@ +// Copyright (C) 2016 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_RANDOM_cOLOR_TRANSFORM_ABSTRACT_Hh_ +#ifdef DLIB_RANDOM_cOLOR_TRANSFORM_ABSTRACT_Hh_ + +#include "../image_processing/generic_image.h" +#include "../pixel.h" +#include "../rand.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + class random_color_transform + { + /*! + WHAT THIS OBJECT REPRESENTS + This object generates a random color balancing and gamma correction + transform. It then allows you to apply that specific transform to as many + rgb_pixel objects as you like. + !*/ + + public: + + random_color_transform ( + dlib::rand& rnd, + const double gamma_magnitude = 0.5, + const double color_magnitude = 0.2 + ); + /*! + requires + - 0 <= gamma_magnitude + - 0 <= color_magnitude <= 1 + ensures + - This constructor generates a random color transform which can be applied + by calling this object's operator() method. + - The color transform is a gamma correction and color rebalancing. If + gamma_magnitude == 0 and color_magnitude == 0 then the transform doesn't + change any colors at all. However, the larger these parameters the more + noticeable the resulting transform. + !*/ + + rgb_pixel operator()( + rgb_pixel p + ) const; + /*! + ensures + - returns the color transformed version of p. + !*/ + }; + +// ---------------------------------------------------------------------------------------- + + template <typename image_type> + void disturb_colors ( + image_type& img, + dlib::rand& rnd, + const double gamma_magnitude = 0.5, + const double color_magnitude = 0.2 + ); + /*! + requires + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + ensures + - Applies a random color transform to the given image. This is done by + creating a random_color_transform with the given parameters and then + transforming each pixel in the image with the resulting transform. + !*/ + +// ---------------------------------------------------------------------------------------- + + template <typename image_type> + void apply_random_color_offset ( + image_type& img, + dlib::rand& rnd + ); + /*! + ensures + - Picks a random color offset vector and adds it to the given image. The offset + vector is selected using the method described in the paper: + Krizhevsky, Alex, Ilya Sutskever, and Geoffrey E. Hinton. "Imagenet + classification with deep convolutional neural networks." Advances in neural + information processing systems. 2012. + In particular, we sample an RGB value from the typical distribution of RGB + values, assuming it has a Gaussian distribution, and then divide it by 10. + This sampled RGB vector is added to each pixel of img. + !*/ + +// ---------------------------------------------------------------------------------------- + +#endif // DLIB_RANDOM_cOLOR_TRANSFORM_ABSTRACT_Hh_ + diff --git a/ml/dlib/dlib/image_transforms/random_cropper.h b/ml/dlib/dlib/image_transforms/random_cropper.h new file mode 100644 index 000000000..2c754b608 --- /dev/null +++ b/ml/dlib/dlib/image_transforms/random_cropper.h @@ -0,0 +1,361 @@ +// Copyright (C) 2016 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_RaNDOM_CROPPER_H_ +#define DLIB_RaNDOM_CROPPER_H_ + +#include "random_cropper_abstract.h" +#include "../threads.h" +#include <mutex> +#include <vector> +#include "interpolation.h" +#include "../image_processing/full_object_detection.h" +#include "../rand.h" + +namespace dlib +{ + class random_cropper + { + chip_dims dims = chip_dims(300,300); + bool randomly_flip = true; + double max_rotation_degrees = 30; + long min_object_length_long_dim = 75; // cropped object will be at least this many pixels along its longest edge. + long min_object_length_short_dim = 30; // cropped object will be at least this many pixels along its shortest edge. + double max_object_size = 0.7; // cropped object will be at most this fraction of the size of the image. + double background_crops_fraction = 0.5; + double translate_amount = 0.10; + + std::mutex rnd_mutex; + dlib::rand rnd; + public: + + void set_seed ( + time_t seed + ) { rnd = dlib::rand(seed); } + + double get_translate_amount ( + ) const { return translate_amount; } + + void set_translate_amount ( + double value + ) + { + DLIB_CASSERT(0 <= value); + translate_amount = value; + } + + double get_background_crops_fraction ( + ) const { return background_crops_fraction; } + + void set_background_crops_fraction ( + double value + ) + { + DLIB_CASSERT(0 <= value && value <= 1); + background_crops_fraction = value; + } + + const chip_dims& get_chip_dims( + ) const { return dims; } + + void set_chip_dims ( + const chip_dims& dims_ + ) { dims = dims_; } + + void set_chip_dims ( + unsigned long rows, + unsigned long cols + ) { set_chip_dims(chip_dims(rows,cols)); } + + bool get_randomly_flip ( + ) const { return randomly_flip; } + + void set_randomly_flip ( + bool value + ) { randomly_flip = value; } + + double get_max_rotation_degrees ( + ) const { return max_rotation_degrees; } + void set_max_rotation_degrees ( + double value + ) { max_rotation_degrees = std::abs(value); } + + long get_min_object_length_long_dim ( + ) const { return min_object_length_long_dim; } + long get_min_object_length_short_dim ( + ) const { return min_object_length_short_dim; } + + void set_min_object_size ( + long long_dim, + long short_dim + ) + { + DLIB_CASSERT(0 < short_dim && short_dim <= long_dim); + min_object_length_long_dim = long_dim; + min_object_length_short_dim = short_dim; + } + + double get_max_object_size ( + ) const { return max_object_size; } + void set_max_object_size ( + double value + ) + { + DLIB_CASSERT(0 < value); + max_object_size = value; + } + + template < + typename array_type + > + void operator() ( + size_t num_crops, + const array_type& images, + const std::vector<std::vector<mmod_rect>>& rects, + array_type& crops, + std::vector<std::vector<mmod_rect>>& crop_rects + ) + { + DLIB_CASSERT(images.size() == rects.size()); + crops.clear(); + crop_rects.clear(); + append(num_crops, images, rects, crops, crop_rects); + } + + template < + typename array_type + > + void append ( + size_t num_crops, + const array_type& images, + const std::vector<std::vector<mmod_rect>>& rects, + array_type& crops, + std::vector<std::vector<mmod_rect>>& crop_rects + ) + { + DLIB_CASSERT(images.size() == rects.size()); + DLIB_CASSERT(crops.size() == crop_rects.size()); + auto original_size = crops.size(); + crops.resize(crops.size()+num_crops); + crop_rects.resize(crop_rects.size()+num_crops); + parallel_for(original_size, original_size+num_crops, [&](long i) { + (*this)(images, rects, crops[i], crop_rects[i]); + }); + } + + + template < + typename array_type, + typename image_type + > + void operator() ( + const array_type& images, + const std::vector<std::vector<mmod_rect>>& rects, + image_type& crop, + std::vector<mmod_rect>& crop_rects + ) + { + DLIB_CASSERT(images.size() == rects.size()); + size_t idx; + { std::lock_guard<std::mutex> lock(rnd_mutex); + idx = rnd.get_integer(images.size()); + } + (*this)(images[idx], rects[idx], crop, crop_rects); + } + + template < + typename image_type1 + > + image_type1 operator() ( + const image_type1& img + ) + { + image_type1 crop; + std::vector<mmod_rect> junk1, junk2; + (*this)(img, junk1, crop, junk2); + return crop; + } + + template < + typename image_type1, + typename image_type2 + > + void operator() ( + const image_type1& img, + const std::vector<mmod_rect>& rects, + image_type2& crop, + std::vector<mmod_rect>& crop_rects + ) + { + DLIB_CASSERT(num_rows(img)*num_columns(img) != 0); + chip_details crop_plan; + bool should_flip_crop; + make_crop_plan(img, rects, crop_plan, should_flip_crop); + + extract_image_chip(img, crop_plan, crop); + const rectangle_transform tform = get_mapping_to_chip(crop_plan); + + // copy rects into crop_rects and set ones that are outside the crop to ignore or + // drop entirely as appropriate. + crop_rects.clear(); + for (auto rect : rects) + { + // map to crop + rect.rect = tform(rect.rect); + + // if the rect is at least partly in the crop + if (get_rect(crop).intersect(rect.rect).area() != 0) + { + // set to ignore if not totally in the crop or if too small. + if (!get_rect(crop).contains(rect.rect) || + ((long)rect.rect.height() < min_object_length_long_dim && (long)rect.rect.width() < min_object_length_long_dim) || + ((long)rect.rect.height() < min_object_length_short_dim || (long)rect.rect.width() < min_object_length_short_dim)) + { + rect.ignore = true; + } + + crop_rects.push_back(rect); + } + } + + // Also randomly flip the image + if (should_flip_crop) + { + image_type2 temp; + flip_image_left_right(crop, temp); + swap(crop,temp); + for (auto&& rect : crop_rects) + rect.rect = impl::flip_rect_left_right(rect.rect, get_rect(crop)); + } + } + + private: + + template <typename image_type1> + void make_crop_plan ( + const image_type1& img, + const std::vector<mmod_rect>& rects, + chip_details& crop_plan, + bool& should_flip_crop + ) + { + std::lock_guard<std::mutex> lock(rnd_mutex); + rectangle crop_rect; + if (has_non_ignored_box(rects) && rnd.get_random_double() >= background_crops_fraction) + { + auto rect = rects[randomly_pick_rect(rects)].rect; + + // perturb the location of the crop by a small fraction of the object's size. + const point rand_translate = dpoint(rnd.get_double_in_range(-translate_amount,translate_amount)*std::max(rect.height(),rect.width()), + rnd.get_double_in_range(-translate_amount,translate_amount)*std::max(rect.height(),rect.width())); + + // We are going to grow rect into the cropping rect. First, we grow it a + // little so that it has the desired minimum border around it. + drectangle drect = centered_drect(center(rect)+rand_translate, rect.width()/max_object_size, rect.height()/max_object_size); + + // Now make rect have the same aspect ratio as dims so that there won't be + // any funny stretching when we crop it. We do this by growing it along + // whichever dimension is too short. + const double target_aspect = dims.cols/(double)dims.rows; + if (drect.width()/drect.height() < target_aspect) + drect = centered_drect(drect, target_aspect*drect.height(), drect.height()); + else + drect = centered_drect(drect, drect.width(), drect.width()/target_aspect); + + // Now perturb the scale of the crop. We do this by shrinking it, but not + // so much that it gets smaller than the min object sizes require. + double current_width = dims.cols*rect.width()/drect.width(); + double current_height = dims.rows*rect.height()/drect.height(); + + // never make any dimension smaller than the short dim. + double min_scale1 = std::max(min_object_length_short_dim/current_width, min_object_length_short_dim/current_height); + // at least one dimension needs to be longer than the long dim. + double min_scale2 = std::min(min_object_length_long_dim/current_width, min_object_length_long_dim/current_height); + double min_scale = std::max(min_scale1, min_scale2); + + const double rand_scale_perturb = 1.0/rnd.get_double_in_range(min_scale, 1); + crop_rect = centered_drect(drect, drect.width()*rand_scale_perturb, drect.height()*rand_scale_perturb); + + } + else + { + crop_rect = make_random_cropping_rect(img); + } + should_flip_crop = randomly_flip && rnd.get_random_double() > 0.5; + const double angle = rnd.get_double_in_range(-max_rotation_degrees, max_rotation_degrees)*pi/180; + crop_plan = chip_details(crop_rect, dims, angle); + } + + bool has_non_ignored_box ( + const std::vector<mmod_rect>& rects + ) const + { + for (auto&& b : rects) + { + if (!b.ignore) + return true; + } + return false; + } + + size_t randomly_pick_rect ( + const std::vector<mmod_rect>& rects + ) + { + DLIB_CASSERT(has_non_ignored_box(rects)); + size_t idx = rnd.get_integer(rects.size()); + while(rects[idx].ignore) + idx = rnd.get_integer(rects.size()); + return idx; + } + + template <typename image_type> + rectangle make_random_cropping_rect( + const image_type& img_ + ) + { + const_image_view<image_type> img(img_); + // Figure out what rectangle we want to crop from the image. We are going to + // crop out an image of size this->dims, so we pick a random scale factor that + // lets this random box be either as big as it can be while still fitting in + // the image or as small as a 3x zoomed in box randomly somewhere in the image. + double mins = 1.0/3.0, maxs = std::min(img.nr()/(double)dims.rows, img.nc()/(double)dims.cols); + mins = std::min(mins, maxs); + auto scale = rnd.get_double_in_range(mins, maxs); + rectangle rect(scale*dims.cols, scale*dims.rows); + // randomly shift the box around + point offset(rnd.get_integer(1+img.nc()-rect.width()), + rnd.get_integer(1+img.nr()-rect.height())); + return move_rect(rect, offset); + } + + + + }; + +// ---------------------------------------------------------------------------------------- + + inline std::ostream& operator<< ( + std::ostream& out, + const random_cropper& item + ) + { + using std::endl; + out << "random_cropper details: " << endl; + out << " chip_dims.rows: " << item.get_chip_dims().rows << endl; + out << " chip_dims.cols: " << item.get_chip_dims().cols << endl; + out << " randomly_flip: " << std::boolalpha << item.get_randomly_flip() << endl; + out << " max_rotation_degrees: " << item.get_max_rotation_degrees() << endl; + out << " min_object_length_long_dim: " << item.get_min_object_length_long_dim() << endl; + out << " min_object_length_short_dim: " << item.get_min_object_length_short_dim() << endl; + out << " max_object_size: " << item.get_max_object_size() << endl; + out << " background_crops_fraction: " << item.get_background_crops_fraction() << endl; + out << " translate_amount: " << item.get_translate_amount() << endl; + return out; + } + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_RaNDOM_CROPPER_H_ + diff --git a/ml/dlib/dlib/image_transforms/random_cropper_abstract.h b/ml/dlib/dlib/image_transforms/random_cropper_abstract.h new file mode 100644 index 000000000..7603a1c47 --- /dev/null +++ b/ml/dlib/dlib/image_transforms/random_cropper_abstract.h @@ -0,0 +1,346 @@ +// Copyright (C) 2016 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_RaNDOM_CROPPER_ABSTRACT_H_ +#ifdef DLIB_RaNDOM_CROPPER_ABSTRACT_H_ + +#include "../threads.h" +#include <mutex> +#include <vector> +#include "interpolation.h" +#include "../image_processing/full_object_detection.h" +#include "../rand.h" + +namespace dlib +{ + class random_cropper + { + /*! + WHAT THIS OBJECT REPRESENTS + This object is a tool for extracting random crops of objects from a set of + images. The crops are randomly jittered in scale, translation, and + rotation but more or less centered on objects specified by mmod_rect + objects. + + THREAD SAFETY + It is safe for multiple threads to make concurrent calls to this object's + operator() methods. + !*/ + + public: + + random_cropper ( + ); + /*! + ensures + - #get_chip_dims() == chip_dims(300,300) + - #get_randomly_flip() == true + - #get_max_rotation_degrees() == 30 + - #get_min_object_length_long_dim() == 70 + - #get_min_object_length_short_dim() == 30 + - #get_max_object_size() == 0.7 + - #get_background_crops_fraction() == 0.5 + - #get_translate_amount() == 0.1 + !*/ + + void set_seed ( + time_t seed + ); + /*! + ensures + - Seeds the internal random number generator with the given seed. + !*/ + + double get_translate_amount ( + ) const; + /*! + ensures + - When a box is cropped out, it will be randomly translated prior to + cropping by #get_translate_amount()*(the box's height) up or down and + #get_translate_amount()*(the box's width) left or right. + !*/ + + void set_translate_amount ( + double value + ); + /*! + requires + - value >= 0 + ensures + - #get_translate_amount() == value + !*/ + + double get_background_crops_fraction ( + ) const; + /*! + ensures + - When making random crops, get_background_crops_fraction() fraction of + them will be from random background rather than being centered on some + object in the dataset. + !*/ + + void set_background_crops_fraction ( + double value + ); + /*! + requires + - 0 <= value <= 1 + ensures + - #get_background_crops_fraction() == value + !*/ + + const chip_dims& get_chip_dims( + ) const; + /*! + ensures + - returns the dimensions of image chips produced by this object. + !*/ + + void set_chip_dims ( + const chip_dims& dims + ); + /*! + ensures + - #get_chip_dims() == dims + !*/ + + void set_chip_dims ( + unsigned long rows, + unsigned long cols + ); + /*! + ensures + - #get_chip_dims() == chip_dims(rows,cols) + !*/ + + bool get_randomly_flip ( + ) const; + /*! + ensures + - if this object will randomly mirror chips left to right. + !*/ + + void set_randomly_flip ( + bool value + ); + /*! + ensures + - #get_randomly_flip() == value + !*/ + + double get_max_rotation_degrees ( + ) const; + /*! + ensures + - When extracting an image chip, this object will pick a random rotation + in the range [-get_max_rotation_degrees(), get_max_rotation_degrees()] + and rotate the chip by that amount. + !*/ + + void set_max_rotation_degrees ( + double value + ); + /*! + ensures + - #get_max_rotation_degrees() == std::abs(value) + !*/ + + long get_min_object_length_long_dim ( + ) const; + /*! + ensures + - When a chip is extracted around an object, the chip will be sized so that + the longest edge of the object (i.e. either its height or width, + whichever is longer) is at least #get_min_object_length_long_dim() pixels + in length. When we say "object" here we are referring specifically to + the rectangle in the mmod_rect output by the cropper. + !*/ + + long get_min_object_length_short_dim ( + ) const; + /*! + ensures + - When a chip is extracted around an object, the chip will be sized so that + the shortest edge of the object (i.e. either its height or width, + whichever is shorter) is at least #get_min_object_length_short_dim() + pixels in length. When we say "object" here we are referring + specifically to the rectangle in the mmod_rect output by the cropper. + !*/ + + void set_min_object_size ( + long long_dim, + long short_dim + ); + /*! + requires + - 0 < short_dim <= long_dim + ensures + - #get_min_object_length_short_dim() == short_dim + - #get_min_object_length_long_dim() == long_dim + !*/ + + double get_max_object_size ( + ) const; + /*! + ensures + - When a chip is extracted around an object, the chip will be sized so that + both the object's height and width are at most get_max_object_size() * + the chip's height and width, respectively. E.g. if the chip is 640x480 + pixels in size then the object will be at most 480*get_max_object_size() + pixels tall and 640*get_max_object_size() pixels wide. + !*/ + + void set_max_object_size ( + double value + ); + /*! + requires + - 0 < value + ensures + - #get_max_object_size() == value + !*/ + + template < + typename array_type + > + void append ( + size_t num_crops, + const array_type& images, + const std::vector<std::vector<mmod_rect>>& rects, + array_type& crops, + std::vector<std::vector<mmod_rect>>& crop_rects + ); + /*! + requires + - images.size() == rects.size() + - crops.size() == crop_rects.size() + - for all valid i: + - images[i].size() != 0 + - array_type is a type with an interface compatible with dlib::array or + std::vector and it must in turn contain image objects that implement the + interface defined in dlib/image_processing/generic_image.h + ensures + - Randomly extracts num_crops chips from images and appends them to the end + of crops. We also copy the object metadata for each extracted crop and + store it into #crop_rects. In particular, calling this function is the + same as making multiple calls to the version of operator() below that + outputs a single crop, except that append() will use multiple CPU cores + to do the processing and is therefore faster. + - #crops.size() == crops.size()+num_crops + - #crop_rects.size() == crop_rects.size()+num_crops + !*/ + + template < + typename array_type + > + void operator() ( + size_t num_crops, + const array_type& images, + const std::vector<std::vector<mmod_rect>>& rects, + array_type& crops, + std::vector<std::vector<mmod_rect>>& crop_rects + ); + /*! + requires + - images.size() == rects.size() + - for all valid i: + - images[i].size() != 0 + - array_type is a type with an interface compatible with dlib::array or + std::vector and it must in turn contain image objects that implement the + interface defined in dlib/image_processing/generic_image.h + ensures + - Randomly extracts num_crops chips from images. We also copy the object + metadata for each extracted crop and store it into #crop_rects. In + particular, calling this function is the same as invoking the version of + operator() below multiple times, except that this version of operator() + will use multiple CPU cores to do the processing and is therefore faster. + - #crops.size() == num_crops + - #crop_rects.size() == num_crops + !*/ + + template < + typename array_type, + typename image_type + > + void operator() ( + const array_type& images, + const std::vector<std::vector<mmod_rect>>& rects, + image_type& crop, + std::vector<mmod_rect>& crop_rects + ); + /*! + requires + - images.size() == rects.size() + - for all valid i: + - images[i].size() != 0 + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - array_type is a type with an interface compatible with dlib::array or + std::vector and it must in turn contain image objects that implement the + interface defined in dlib/image_processing/generic_image.h + ensures + - Selects a random image and creates a random crop from it. Specifically, + we pick a random index IDX < images.size() and then execute + (*this)(images[IDX],rects[IDX],crop,crop_rects) + !*/ + + template < + typename image_type1, + typename image_type2 + > + void operator() ( + const image_type1& img, + const std::vector<mmod_rect>& rects, + image_type2& crop, + std::vector<mmod_rect>& crop_rects + ); + /*! + requires + - img.size() != 0 + - image_type1 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - image_type2 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + ensures + - Extracts a random crop from img and copies over the mmod_rect objects in + rects to #crop_rects if they are contained inside the crop. Moreover, + rectangles are marked as ignore if they aren't completely contained + inside the crop. + - #crop_rects.size() <= rects.size() + !*/ + + template < + typename image_type1 + > + image_type1 operator() ( + const image_type1& img + ); + /*! + requires + - img.size() != 0 + - image_type1 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + ensures + - This function simply calls (*this)(img, junk1, crop, junk2) and returns + crop. Therefore it is simply a convenience function for extracting a + random background patch. + !*/ + }; + +// ---------------------------------------------------------------------------------------- + + std::ostream& operator<< ( + std::ostream& out, + const random_cropper& item + ); + /*! + ensures + - Prints the state of all the parameters of item to out. + !*/ + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_RaNDOM_CROPPER_ABSTRACT_H_ + + diff --git a/ml/dlib/dlib/image_transforms/segment_image.h b/ml/dlib/dlib/image_transforms/segment_image.h new file mode 100644 index 000000000..3b57e4801 --- /dev/null +++ b/ml/dlib/dlib/image_transforms/segment_image.h @@ -0,0 +1,730 @@ +// Copyright (C) 2011 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_SEGMENT_ImAGE_Hh_ +#define DLIB_SEGMENT_ImAGE_Hh_ + +#include "segment_image_abstract.h" +#include "../algs.h" +#include <vector> +#include "../geometry.h" +#include "../disjoint_subsets.h" +#include "../set.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + namespace impl + { + template <typename T> + inline T edge_diff_uint( + const T& a, + const T& b + ) + { + if (a > b) + return a - b; + else + return b - a; + } + + // ---------------------------------------- + + template <typename T, typename enabled = void> + struct edge_diff_funct + { + typedef double diff_type; + + template <typename pixel_type> + double operator()( + const pixel_type& a, + const pixel_type& b + ) const + { + return length(pixel_to_vector<double>(a) - pixel_to_vector<double>(b)); + } + }; + + template <> + struct edge_diff_funct<uint8,void> + { + typedef uint8 diff_type; + uint8 operator()( const uint8& a, const uint8& b) const { return edge_diff_uint(a,b); } + }; + + template <> + struct edge_diff_funct<uint16,void> + { + typedef uint16 diff_type; + uint16 operator()( const uint16& a, const uint16& b) const { return edge_diff_uint(a,b); } + }; + + template <> + struct edge_diff_funct<uint32,void> + { + typedef uint32 diff_type; + uint32 operator()( const uint32& a, const uint32& b) const { return edge_diff_uint(a,b); } + }; + + template <> + struct edge_diff_funct<double,void> + { + typedef double diff_type; + double operator()( const double& a, const double& b) const { return std::abs(a-b); } + }; + + template <typename T> + struct edge_diff_funct<T, typename enable_if<is_matrix<T> >::type> + { + typedef double diff_type; + double operator()( + const T& a, + const T& b + ) const + { + return length(a-b); + } + }; + + // ------------------------------------------------------------------------------------ + + template <typename T> + struct graph_image_segmentation_data_T + { + graph_image_segmentation_data_T() : component_size(1), internal_diff(0) {} + unsigned long component_size; + T internal_diff; + }; + + // ------------------------------------------------------------------------------------ + + template <typename T> + struct segment_image_edge_data_T + { + segment_image_edge_data_T (){} + + segment_image_edge_data_T ( + const rectangle& rect, + const point& p1, + const point& p2, + const T& diff_ + ) : + idx1(p1.y()*rect.width() + p1.x()), + idx2(p2.y()*rect.width() + p2.x()), + diff(diff_) + {} + + bool operator<(const segment_image_edge_data_T& item) const + { return diff < item.diff; } + + unsigned long idx1; + unsigned long idx2; + T diff; + }; + + // ------------------------------------------------------------------------------------ + + template <typename image_view_type> + struct uint8_or_uint16_pixels + { + typedef typename image_view_type::pixel_type pixel_type; + const static bool value = is_same_type<pixel_type,uint8>::value || + is_same_type<pixel_type,uint16>::value; + }; + + // This is an overload of get_pixel_edges() that is optimized to segment images + // with 8bit or 16bit pixels very quickly. We do this by using a radix sort + // instead of quicksort. + template <typename in_image_type, typename T> + typename enable_if<uint8_or_uint16_pixels<in_image_type> >::type + get_pixel_edges ( + const in_image_type& in_img, + std::vector<segment_image_edge_data_T<T> >& sorted_edges + ) + { + typedef typename in_image_type::pixel_type ptype; + typedef T diff_type; + std::vector<unsigned long> counts(std::numeric_limits<ptype>::max()+1, 0); + + edge_diff_funct<ptype> edge_diff; + + border_enumerator be(get_rect(in_img), 1); + // we are going to do a radix sort on the edge weights. So the first step + // is to accumulate them into count. + const rectangle area = get_rect(in_img); + while (be.move_next()) + { + const long r = be.element().y(); + const long c = be.element().x(); + const ptype pix = in_img[r][c]; + if (area.contains(c-1,r)) counts[edge_diff(pix, in_img[r ][c-1])] += 1; + if (area.contains(c+1,r)) counts[edge_diff(pix, in_img[r ][c+1])] += 1; + if (area.contains(c ,r-1)) counts[edge_diff(pix, in_img[r-1][c ])] += 1; + if (area.contains(c ,r+1)) counts[edge_diff(pix, in_img[r+1][c ])] += 1; + } + for (long r = 1; r+1 < in_img.nr(); ++r) + { + for (long c = 1; c+1 < in_img.nc(); ++c) + { + const ptype pix = in_img[r][c]; + counts[edge_diff(pix, in_img[r-1][c+1])] += 1; + counts[edge_diff(pix, in_img[r ][c+1])] += 1; + counts[edge_diff(pix, in_img[r+1][c ])] += 1; + counts[edge_diff(pix, in_img[r+1][c+1])] += 1; + } + } + + const unsigned long num_edges = shrink_rect(area,1).area()*4 + in_img.nr()*2*3 - 4 + (in_img.nc()-2)*2*3; + typedef segment_image_edge_data_T<T> segment_image_edge_data; + sorted_edges.resize(num_edges); + + // integrate counts. The idea is to have sorted_edges[counts[i]] be the location that edges + // with an edge_diff of i go. So counts[0] == 0, counts[1] == number of 0 edge diff edges, etc. + unsigned long prev = counts[0]; + for (unsigned long i = 1; i < counts.size(); ++i) + { + const unsigned long temp = counts[i]; + counts[i] += counts[i-1]; + counts[i-1] -= prev; + prev = temp; + } + counts[counts.size()-1] -= prev; + + + // now build a sorted list of all the edges + be.reset(); + while(be.move_next()) + { + const point p = be.element(); + const long r = p.y(); + const long c = p.x(); + const ptype pix = in_img[r][c]; + if (area.contains(c-1,r)) + { + const diff_type diff = edge_diff(pix, in_img[r ][c-1]); + sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c-1,r),diff); + } + + if (area.contains(c+1,r)) + { + const diff_type diff = edge_diff(pix, in_img[r ][c+1]); + sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r),diff); + } + + if (area.contains(c ,r-1)) + { + const diff_type diff = edge_diff(pix, in_img[r-1][c ]); + sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c ,r-1),diff); + } + + if (area.contains(c ,r+1)) + { + const diff_type diff = edge_diff(pix, in_img[r+1][c ]); + sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c ,r+1),diff); + } + } + // same thing as the above loop but now we do it on the interior of the image and therefore + // don't have to include the boundary checking if statements used above. + for (long r = 1; r+1 < in_img.nr(); ++r) + { + for (long c = 1; c+1 < in_img.nc(); ++c) + { + const point p(c,r); + const ptype pix = in_img[r][c]; + diff_type diff; + + diff = edge_diff(pix, in_img[r ][c+1]); + sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r),diff); + diff = edge_diff(pix, in_img[r-1][c+1]); + sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r-1),diff); + diff = edge_diff(pix, in_img[r+1][c+1]); + sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r+1),diff); + diff = edge_diff(pix, in_img[r+1][c ]); + sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c ,r+1),diff); + } + } + } + + // ---------------------------------------------------------------------------------------- + + // This is the general purpose version of get_pixel_edges(). It handles all pixel types. + template <typename in_image_type, typename T> + typename disable_if<uint8_or_uint16_pixels<in_image_type> >::type + get_pixel_edges ( + const in_image_type& in_img, + std::vector<segment_image_edge_data_T<T> >& sorted_edges + ) + { + const rectangle area = get_rect(in_img); + sorted_edges.reserve(area.area()*4); + + typedef typename in_image_type::pixel_type ptype; + edge_diff_funct<ptype> edge_diff; + typedef T diff_type; + typedef segment_image_edge_data_T<T> segment_image_edge_data; + + border_enumerator be(get_rect(in_img), 1); + + // now build a sorted list of all the edges + be.reset(); + while(be.move_next()) + { + const point p = be.element(); + const long r = p.y(); + const long c = p.x(); + const ptype& pix = in_img[r][c]; + if (area.contains(c-1,r)) + { + const diff_type diff = edge_diff(pix, in_img[r ][c-1]); + sorted_edges.push_back(segment_image_edge_data(area,p,point(c-1,r),diff)); + } + + if (area.contains(c+1,r)) + { + const diff_type diff = edge_diff(pix, in_img[r ][c+1]); + sorted_edges.push_back(segment_image_edge_data(area,p,point(c+1,r),diff)); + } + + if (area.contains(c ,r-1)) + { + const diff_type diff = edge_diff(pix, in_img[r-1][c ]); + sorted_edges.push_back( segment_image_edge_data(area,p,point(c ,r-1),diff)); + } + if (area.contains(c ,r+1)) + { + const diff_type diff = edge_diff(pix, in_img[r+1][c ]); + sorted_edges.push_back( segment_image_edge_data(area,p,point(c ,r+1),diff)); + } + } + // same thing as the above loop but now we do it on the interior of the image and therefore + // don't have to include the boundary checking if statements used above. + for (long r = 1; r+1 < in_img.nr(); ++r) + { + for (long c = 1; c+1 < in_img.nc(); ++c) + { + const point p(c,r); + const ptype& pix = in_img[r][c]; + diff_type diff; + + diff = edge_diff(pix, in_img[r ][c+1]); + sorted_edges.push_back( segment_image_edge_data(area,p,point(c+1,r),diff)); + diff = edge_diff(pix, in_img[r+1][c+1]); + sorted_edges.push_back( segment_image_edge_data(area,p,point(c+1,r+1),diff)); + diff = edge_diff(pix, in_img[r+1][c ]); + sorted_edges.push_back( segment_image_edge_data(area,p,point(c ,r+1),diff)); + diff = edge_diff(pix, in_img[r-1][c+1]); + sorted_edges.push_back( segment_image_edge_data(area,p,point(c+1,r-1),diff)); + } + } + + std::sort(sorted_edges.begin(), sorted_edges.end()); + + } + + // ------------------------------------------------------------------------------------ + + } // end of namespace impl + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type + > + void segment_image ( + const in_image_type& in_img_, + out_image_type& out_img_, + const double k = 200, + const unsigned long min_size = 10 + ) + { + using namespace dlib::impl; + typedef typename image_traits<in_image_type>::pixel_type ptype; + typedef typename edge_diff_funct<ptype>::diff_type diff_type; + + // make sure requires clause is not broken + DLIB_ASSERT(is_same_object(in_img_, out_img_) == false, + "\t void segment_image()" + << "\n\t The input images can't be the same object." + ); + + COMPILE_TIME_ASSERT(is_unsigned_type<typename image_traits<out_image_type>::pixel_type>::value); + + const_image_view<in_image_type> in_img(in_img_); + image_view<out_image_type> out_img(out_img_); + + out_img.set_size(in_img.nr(), in_img.nc()); + // don't bother doing anything if the image is too small + if (in_img.nr() < 2 || in_img.nc() < 2) + { + assign_all_pixels(out_img,0); + return; + } + + disjoint_subsets sets; + sets.set_size(in_img.size()); + + std::vector<segment_image_edge_data_T<diff_type> > sorted_edges; + get_pixel_edges(in_img, sorted_edges); + + std::vector<graph_image_segmentation_data_T<diff_type> > data(in_img.size()); + + // now start connecting blobs together to make a minimum spanning tree. + for (unsigned long i = 0; i < sorted_edges.size(); ++i) + { + const unsigned long idx1 = sorted_edges[i].idx1; + const unsigned long idx2 = sorted_edges[i].idx2; + + unsigned long set1 = sets.find_set(idx1); + unsigned long set2 = sets.find_set(idx2); + if (set1 != set2) + { + const diff_type diff = sorted_edges[i].diff; + const diff_type tau1 = static_cast<diff_type>(k/data[set1].component_size); + const diff_type tau2 = static_cast<diff_type>(k/data[set2].component_size); + + const diff_type mint = std::min(data[set1].internal_diff + tau1, + data[set2].internal_diff + tau2); + if (diff <= mint) + { + const unsigned long new_set = sets.merge_sets(set1, set2); + data[new_set].component_size = data[set1].component_size + data[set2].component_size; + data[new_set].internal_diff = diff; + } + } + } + + // now merge any really small blobs + if (min_size != 0) + { + for (unsigned long i = 0; i < sorted_edges.size(); ++i) + { + const unsigned long idx1 = sorted_edges[i].idx1; + const unsigned long idx2 = sorted_edges[i].idx2; + + unsigned long set1 = sets.find_set(idx1); + unsigned long set2 = sets.find_set(idx2); + if (set1 != set2 && (data[set1].component_size < min_size || data[set2].component_size < min_size)) + { + const unsigned long new_set = sets.merge_sets(set1, set2); + data[new_set].component_size = data[set1].component_size + data[set2].component_size; + //data[new_set].internal_diff = sorted_edges[i].diff; + } + } + } + + unsigned long idx = 0; + for (long r = 0; r < out_img.nr(); ++r) + { + for (long c = 0; c < out_img.nc(); ++c) + { + out_img[r][c] = sets.find_set(idx++); + } + } + } + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- +// Candidate object location generation code. +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + + namespace impl + { + struct edge_data + { + double edge_diff; + unsigned long set1; + unsigned long set2; + bool operator<(const edge_data& item) const + { + return edge_diff < item.edge_diff; + } + }; + + template < + typename in_image_type, + typename diff_type + > + void find_basic_candidate_object_locations ( + const in_image_type& in_img, + const std::vector<dlib::impl::segment_image_edge_data_T<diff_type> >& sorted_edges, + std::vector<rectangle>& out_rects, + std::vector<edge_data>& edges, + const double k, + const unsigned long min_size + ) + { + using namespace dlib::impl; + + std::vector<dlib::impl::segment_image_edge_data_T<diff_type> > rejected_edges; + rejected_edges.reserve(sorted_edges.size()); + + out_rects.clear(); + edges.clear(); + + // don't bother doing anything if the image is too small + if (in_img.nr() < 2 || in_img.nc() < 2) + { + return; + } + + disjoint_subsets sets; + sets.set_size(in_img.size()); + + + std::vector<graph_image_segmentation_data_T<diff_type> > data(in_img.size()); + + + + std::pair<unsigned long,unsigned long> last_blob_edge(std::numeric_limits<unsigned long>::max(), + std::numeric_limits<unsigned long>::max());; + // now start connecting blobs together to make a minimum spanning tree. + for (unsigned long i = 0; i < sorted_edges.size(); ++i) + { + const unsigned long idx1 = sorted_edges[i].idx1; + const unsigned long idx2 = sorted_edges[i].idx2; + + unsigned long set1 = sets.find_set(idx1); + unsigned long set2 = sets.find_set(idx2); + if (set1 != set2) + { + const diff_type diff = sorted_edges[i].diff; + const diff_type tau1 = static_cast<diff_type>(k/data[set1].component_size); + const diff_type tau2 = static_cast<diff_type>(k/data[set2].component_size); + + const diff_type mint = std::min(data[set1].internal_diff + tau1, + data[set2].internal_diff + tau2); + if (diff <= mint) + { + const unsigned long new_set = sets.merge_sets(set1, set2); + data[new_set].component_size = data[set1].component_size + data[set2].component_size; + data[new_set].internal_diff = diff; + } + else + { + // Don't bother keeping multiple edges from the same pair of blobs, we + // only need one for what we will do later. + if (std::make_pair(set1,set2) != last_blob_edge) + { + segment_image_edge_data_T<diff_type> temp = sorted_edges[i]; + temp.idx1 = set1; + temp.idx2 = set2; + rejected_edges.push_back(temp); + last_blob_edge = std::make_pair(set1,set2); + } + } + } + } + + + // merge small blobs + for (unsigned long i = 0; i < rejected_edges.size(); ++i) + { + const unsigned long idx1 = rejected_edges[i].idx1; + const unsigned long idx2 = rejected_edges[i].idx2; + + unsigned long set1 = sets.find_set(idx1); + unsigned long set2 = sets.find_set(idx2); + rejected_edges[i].idx1 = set1; + rejected_edges[i].idx2 = set2; + if (set1 != set2 && (data[set1].component_size < min_size || data[set2].component_size < min_size)) + { + const unsigned long new_set = sets.merge_sets(set1, set2); + data[new_set].component_size = data[set1].component_size + data[set2].component_size; + data[new_set].internal_diff = rejected_edges[i].diff; + } + } + + // find bounding boxes of each blob + std::map<unsigned long, rectangle> boxes; + std::map<unsigned long, unsigned long> box_id_map; + unsigned long idx = 0; + for (long r = 0; r < in_img.nr(); ++r) + { + for (long c = 0; c < in_img.nc(); ++c) + { + const unsigned long id = sets.find_set(idx++); + // Accumulate the current point into its box and if it is the first point + // in the box then also record the id number for this box. + if ((boxes[id] += point(c,r)).area() == 1) + box_id_map[id] = boxes.size()-1; + } + } + + // copy boxes into out_rects + out_rects.resize(boxes.size()); + for (std::map<unsigned long,rectangle>::iterator i = boxes.begin(); i != boxes.end(); ++i) + { + out_rects[box_id_map[i->first]] = i->second; + } + + // Now find the edges between the boxes + typedef dlib::memory_manager<char>::kernel_2c mm_type; + dlib::set<std::pair<unsigned long, unsigned long>, mm_type>::kernel_1a neighbors_final; + for (unsigned long i = 0; i < rejected_edges.size(); ++i) + { + const unsigned long idx1 = rejected_edges[i].idx1; + const unsigned long idx2 = rejected_edges[i].idx2; + + unsigned long set1 = sets.find_set(idx1); + unsigned long set2 = sets.find_set(idx2); + if (set1 != set2) + { + std::pair<unsigned long, unsigned long> p = std::make_pair(set1,set2); + if (!neighbors_final.is_member(p)) + { + neighbors_final.add(p); + + edge_data temp; + const diff_type mint = std::min(data[set1].internal_diff , + data[set2].internal_diff ); + temp.edge_diff = rejected_edges[i].diff - mint; + temp.set1 = box_id_map[set1]; + temp.set2 = box_id_map[set2]; + edges.push_back(temp); + } + } + } + + std::sort(edges.begin(), edges.end()); + } + } // end namespace impl + +// ---------------------------------------------------------------------------------------- + + template <typename alloc> + void remove_duplicates ( + std::vector<rectangle,alloc>& rects + ) + { + std::sort(rects.begin(), rects.end(), std::less<rectangle>()); + unsigned long num_unique = 1; + for (unsigned long i = 1; i < rects.size(); ++i) + { + if (rects[i] != rects[i-1]) + { + rects[num_unique++] = rects[i]; + } + } + if (rects.size() != 0) + rects.resize(num_unique); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename EXP + > + void find_candidate_object_locations ( + const in_image_type& in_img_, + std::vector<rectangle>& rects, + const matrix_exp<EXP>& kvals, + const unsigned long min_size = 20, + const unsigned long max_merging_iterations = 50 + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(is_vector(kvals) && kvals.size() > 0, + "\t void find_candidate_object_locations()" + << "\n\t Invalid inputs were given to this function." + << "\n\t is_vector(kvals): " << is_vector(kvals) + << "\n\t kvals.size(): " << kvals.size() + ); + + typedef dlib::memory_manager<char>::kernel_2c mm_type; + typedef dlib::set<rectangle, mm_type>::kernel_1a set_of_rects; + + using namespace dlib::impl; + typedef typename image_traits<in_image_type>::pixel_type ptype; + typedef typename edge_diff_funct<ptype>::diff_type diff_type; + + const_image_view<in_image_type> in_img(in_img_); + + // don't bother doing anything if the image is too small + if (in_img.nr() < 2 || in_img.nc() < 2) + { + return; + } + + std::vector<edge_data> edges; + std::vector<rectangle> working_rects; + std::vector<segment_image_edge_data_T<diff_type> > sorted_edges; + get_pixel_edges(in_img, sorted_edges); + + disjoint_subsets sets; + + for (long j = 0; j < kvals.size(); ++j) + { + const double k = kvals(j); + + find_basic_candidate_object_locations(in_img, sorted_edges, working_rects, edges, k, min_size); + rects.insert(rects.end(), working_rects.begin(), working_rects.end()); + + + // Now iteratively merge all the rectangles we have and record the results. + // Note that, unlike what is described in the paper + // Segmentation as Selective Search for Object Recognition" by Koen E. A. van de Sande, et al. + // we don't use any kind of histogram/SIFT like thing to order the edges + // between the blobs. Here we simply order by the pixel difference value. + // Additionally, note that we keep progressively merging boxes in the outer + // loop rather than performing just a single iteration as indicated in the + // paper. + set_of_rects detected_rects; + bool did_merge = true; + for (unsigned long iter = 0; did_merge && iter < max_merging_iterations; ++iter) + { + did_merge = false; + sets.clear(); + sets.set_size(working_rects.size()); + + // recursively merge neighboring blobs until we have merged everything + for (unsigned long i = 0; i < edges.size(); ++i) + { + edge_data temp = edges[i]; + + temp.set1 = sets.find_set(temp.set1); + temp.set2 = sets.find_set(temp.set2); + if (temp.set1 != temp.set2) + { + rectangle merged_rect = working_rects[temp.set1] + working_rects[temp.set2]; + // Skip merging this pair of blobs if it was merged in a previous + // iteration. Doing this lets us consider other possible blob + // merges. + if (!detected_rects.is_member(merged_rect)) + { + const unsigned long new_set = sets.merge_sets(temp.set1, temp.set2); + rects.push_back(merged_rect); + working_rects[new_set] = merged_rect; + did_merge = true; + detected_rects.add(merged_rect); + } + } + } + } + } + + remove_duplicates(rects); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type + > + void find_candidate_object_locations ( + const in_image_type& in_img, + std::vector<rectangle>& rects + ) + { + find_candidate_object_locations(in_img, rects, linspace(50, 200, 3)); + } + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_SEGMENT_ImAGE_Hh_ + diff --git a/ml/dlib/dlib/image_transforms/segment_image_abstract.h b/ml/dlib/dlib/image_transforms/segment_image_abstract.h new file mode 100644 index 000000000..af1af46a1 --- /dev/null +++ b/ml/dlib/dlib/image_transforms/segment_image_abstract.h @@ -0,0 +1,126 @@ +// Copyright (C) 2011 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_SEGMENT_ImAGE_ABSTRACT_Hh_ +#ifdef DLIB_SEGMENT_ImAGE_ABSTRACT_Hh_ + +#include <vector> +#include "../matrix.h" +#include "../image_processing/generic_image.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type + > + void segment_image ( + const in_image_type& in_img, + out_image_type& out_img, + const double k = 200, + const unsigned long min_size = 10 + ); + /*! + requires + - in_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - out_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - in_image_type can contain any pixel type with a pixel_traits specialization + or a dlib matrix object representing a row or column vector. + - out_image_type must contain an unsigned integer pixel type. + - is_same_object(in_img, out_img) == false + ensures + - Attempts to segment in_img into regions which have some visual consistency to + them. In particular, this function implements the algorithm described in the + paper: Efficient Graph-Based Image Segmentation by Felzenszwalb and Huttenlocher. + - #out_img.nr() == in_img.nr() + - #out_img.nc() == in_img.nc() + - for all valid r and c: + - #out_img[r][c] == an integer value indicating the identity of the segment + containing the pixel in_img[r][c]. + - The k parameter is a measure used to influence how large the segment regions + will be. Larger k generally results in larger segments being produced. For + a deeper discussion of the k parameter you should consult the above + referenced paper. + - min_size is a lower bound on the size of the output segments. That is, it is + guaranteed that all output segments will have at least min_size pixels in + them (unless the whole image contains fewer than min_size pixels, in this + case the entire image will be put into a single segment). + !*/ + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename EXP + > + void find_candidate_object_locations ( + const in_image_type& in_img, + std::vector<rectangle>& rects, + const matrix_exp<EXP>& kvals = linspace(50, 200, 3), + const unsigned long min_size = 20, + const unsigned long max_merging_iterations = 50 + ); + /*! + requires + - in_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - is_vector(kvals) == true + - kvals.size() > 0 + ensures + - This function takes an input image and generates a set of candidate + rectangles which are expected to bound any objects in the image. It does + this by running a version of the segment_image() routine on the image and + then reports rectangles containing each of the segments as well as rectangles + containing unions of adjacent segments. The basic idea is described in the + paper: + Segmentation as Selective Search for Object Recognition by Koen E. A. van de Sande, et al. + Note that this function deviates from what is described in the paper slightly. + See the code for details. + - The basic segmentation is performed kvals.size() times, each time with the k + parameter (see segment_image() and the Felzenszwalb paper for details on k) + set to a different value from kvals. + - When doing the basic segmentations prior to any box merging, we discard all + rectangles that have an area < min_size. Therefore, all outputs and + subsequent merged rectangles are built out of rectangles that contain at + least min_size pixels. Note that setting min_size to a smaller value than + you might otherwise be interested in using can be useful since it allows a + larger number of possible merged boxes to be created. + - There are max_merging_iterations rounds of neighboring blob merging. + Therefore, this parameter has some effect on the number of output rectangles + you get, with larger values of the parameter giving more output rectangles. + - This function appends the output rectangles into #rects. This means that any + rectangles in rects before this function was called will still be in there + after it terminates. Note further that #rects will not contain any duplicate + rectangles. That is, for all valid i and j where i != j it will be true + that: + - #rects[i] != rects[j] + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename alloc + > + void remove_duplicates ( + std::vector<rectangle,alloc>& rects + ); + /*! + ensures + - This function finds any duplicate rectangles in rects and removes the extra + instances. This way, the result is that rects contains only unique rectangle + instances. + !*/ + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_SEGMENT_ImAGE_ABSTRACT_Hh_ + + diff --git a/ml/dlib/dlib/image_transforms/spatial_filtering.h b/ml/dlib/dlib/image_transforms/spatial_filtering.h new file mode 100644 index 000000000..91dcae321 --- /dev/null +++ b/ml/dlib/dlib/image_transforms/spatial_filtering.h @@ -0,0 +1,1580 @@ +// Copyright (C) 2006 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_SPATIAL_FILTERINg_H_ +#define DLIB_SPATIAL_FILTERINg_H_ + +#include "../pixel.h" +#include "spatial_filtering_abstract.h" +#include "../algs.h" +#include "../assert.h" +#include "../array2d.h" +#include "../matrix.h" +#include "../geometry/border_enumerator.h" +#include "../simd.h" +#include <limits> +#include "assign_image.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + namespace impl + { + template < + typename in_image_type, + typename out_image_type, + typename EXP, + typename T + > + rectangle grayscale_spatially_filter_image ( + const in_image_type& in_img_, + out_image_type& out_img_, + const matrix_exp<EXP>& filter_, + T scale, + bool use_abs, + bool add_to + ) + { + const_temp_matrix<EXP> filter(filter_); + COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false ); + COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false ); + + DLIB_ASSERT(scale != 0 && filter.size() != 0, + "\trectangle spatially_filter_image()" + << "\n\t You can't give a scale of zero or an empty filter." + << "\n\t scale: "<< scale + << "\n\t filter.nr(): "<< filter.nr() + << "\n\t filter.nc(): "<< filter.nc() + ); + DLIB_ASSERT(is_same_object(in_img_, out_img_) == false, + "\trectangle spatially_filter_image()" + << "\n\tYou must give two different image objects" + ); + + + const_image_view<in_image_type> in_img(in_img_); + image_view<out_image_type> out_img(out_img_); + + // if there isn't any input image then don't do anything + if (in_img.size() == 0) + { + out_img.clear(); + return rectangle(); + } + + out_img.set_size(in_img.nr(),in_img.nc()); + + + // figure out the range that we should apply the filter to + const long first_row = filter.nr()/2; + const long first_col = filter.nc()/2; + const long last_row = in_img.nr() - ((filter.nr()-1)/2); + const long last_col = in_img.nc() - ((filter.nc()-1)/2); + + const rectangle non_border = rectangle(first_col, first_row, last_col-1, last_row-1); + if (!add_to) + zero_border_pixels(out_img_, non_border); + + // apply the filter to the image + for (long r = first_row; r < last_row; ++r) + { + for (long c = first_col; c < last_col; ++c) + { + typedef typename EXP::type ptype; + ptype p; + ptype temp = 0; + for (long m = 0; m < filter.nr(); ++m) + { + for (long n = 0; n < filter.nc(); ++n) + { + // pull out the current pixel and put it into p + p = get_pixel_intensity(in_img[r-first_row+m][c-first_col+n]); + temp += p*filter(m,n); + } + } + + temp /= scale; + + if (use_abs && temp < 0) + { + temp = -temp; + } + + // save this pixel to the output image + if (add_to == false) + { + assign_pixel(out_img[r][c], temp); + } + else + { + assign_pixel(out_img[r][c], temp + out_img[r][c]); + } + } + } + + return non_border; + } + + // ------------------------------------------------------------------------------------ + + template < + typename in_image_type, + typename out_image_type, + typename EXP + > + rectangle float_spatially_filter_image ( + const in_image_type& in_img_, + out_image_type& out_img_, + const matrix_exp<EXP>& filter_, + bool add_to + ) + { + + const_temp_matrix<EXP> filter(filter_); + DLIB_ASSERT(filter.size() != 0, + "\trectangle spatially_filter_image()" + << "\n\t You can't give an empty filter." + << "\n\t filter.nr(): "<< filter.nr() + << "\n\t filter.nc(): "<< filter.nc() + ); + DLIB_ASSERT(is_same_object(in_img_, out_img_) == false, + "\trectangle spatially_filter_image()" + << "\n\tYou must give two different image objects" + ); + + + const_image_view<in_image_type> in_img(in_img_); + image_view<out_image_type> out_img(out_img_); + + // if there isn't any input image then don't do anything + if (in_img.size() == 0) + { + out_img.clear(); + return rectangle(); + } + + out_img.set_size(in_img.nr(),in_img.nc()); + + + // figure out the range that we should apply the filter to + const long first_row = filter.nr()/2; + const long first_col = filter.nc()/2; + const long last_row = in_img.nr() - ((filter.nr()-1)/2); + const long last_col = in_img.nc() - ((filter.nc()-1)/2); + + const rectangle non_border = rectangle(first_col, first_row, last_col-1, last_row-1); + if (!add_to) + zero_border_pixels(out_img_, non_border); + + // apply the filter to the image + for (long r = first_row; r < last_row; ++r) + { + long c = first_col; + for (; c < last_col-7; c+=8) + { + simd8f p,p2,p3; + simd8f temp = 0, temp2=0, temp3=0; + for (long m = 0; m < filter.nr(); ++m) + { + long n = 0; + for (; n < filter.nc()-2; n+=3) + { + // pull out the current pixel and put it into p + p.load(&in_img[r-first_row+m][c-first_col+n]); + p2.load(&in_img[r-first_row+m][c-first_col+n+1]); + p3.load(&in_img[r-first_row+m][c-first_col+n+2]); + temp += p*filter(m,n); + temp2 += p2*filter(m,n+1); + temp3 += p3*filter(m,n+2); + } + for (; n < filter.nc(); ++n) + { + // pull out the current pixel and put it into p + p.load(&in_img[r-first_row+m][c-first_col+n]); + temp += p*filter(m,n); + } + } + temp += temp2+temp3; + + // save this pixel to the output image + if (add_to == false) + { + temp.store(&out_img[r][c]); + } + else + { + p.load(&out_img[r][c]); + temp += p; + temp.store(&out_img[r][c]); + } + } + for (; c < last_col; ++c) + { + float p; + float temp = 0; + for (long m = 0; m < filter.nr(); ++m) + { + for (long n = 0; n < filter.nc(); ++n) + { + // pull out the current pixel and put it into p + p = in_img[r-first_row+m][c-first_col+n]; + temp += p*filter(m,n); + } + } + + // save this pixel to the output image + if (add_to == false) + { + out_img[r][c] = temp; + } + else + { + out_img[r][c] += temp; + } + } + } + + return non_border; + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type, + typename EXP + > + struct is_float_filtering2 + { + const static bool value = is_same_type<typename image_traits<in_image_type>::pixel_type,float>::value && + is_same_type<typename image_traits<out_image_type>::pixel_type,float>::value && + is_same_type<typename EXP::type,float>::value; + }; + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type, + typename EXP, + typename T + > + typename enable_if_c<pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale && + is_float_filtering2<in_image_type,out_image_type,EXP>::value,rectangle>::type + spatially_filter_image ( + const in_image_type& in_img, + out_image_type& out_img, + const matrix_exp<EXP>& filter, + T scale, + bool use_abs = false, + bool add_to = false + ) + { + if (use_abs == false) + { + if (scale == 1) + return impl::float_spatially_filter_image(in_img, out_img, filter, add_to); + else + return impl::float_spatially_filter_image(in_img, out_img, filter/scale, add_to); + } + else + { + return impl::grayscale_spatially_filter_image(in_img, out_img, filter, scale, true, add_to); + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type, + typename EXP, + typename T + > + typename enable_if_c<pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale && + !is_float_filtering2<in_image_type,out_image_type,EXP>::value,rectangle>::type + spatially_filter_image ( + const in_image_type& in_img, + out_image_type& out_img, + const matrix_exp<EXP>& filter, + T scale, + bool use_abs = false, + bool add_to = false + ) + { + return impl::grayscale_spatially_filter_image(in_img,out_img,filter,scale,use_abs,add_to); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type, + typename EXP, + typename T + > + typename disable_if_c<pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale,rectangle>::type + spatially_filter_image ( + const in_image_type& in_img_, + out_image_type& out_img_, + const matrix_exp<EXP>& filter_, + T scale + ) + { + const_temp_matrix<EXP> filter(filter_); + COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false ); + COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false ); + + DLIB_ASSERT(scale != 0 && filter.size() != 0, + "\trectangle spatially_filter_image()" + << "\n\t You can't give a scale of zero or an empty filter." + << "\n\t scale: "<< scale + << "\n\t filter.nr(): "<< filter.nr() + << "\n\t filter.nc(): "<< filter.nc() + ); + DLIB_ASSERT(is_same_object(in_img_, out_img_) == false, + "\trectangle spatially_filter_image()" + << "\n\tYou must give two different image objects" + ); + + + const_image_view<in_image_type> in_img(in_img_); + image_view<out_image_type> out_img(out_img_); + + // if there isn't any input image then don't do anything + if (in_img.size() == 0) + { + out_img.clear(); + return rectangle(); + } + + out_img.set_size(in_img.nr(),in_img.nc()); + + + // figure out the range that we should apply the filter to + const long first_row = filter.nr()/2; + const long first_col = filter.nc()/2; + const long last_row = in_img.nr() - ((filter.nr()-1)/2); + const long last_col = in_img.nc() - ((filter.nc()-1)/2); + + const rectangle non_border = rectangle(first_col, first_row, last_col-1, last_row-1); + zero_border_pixels(out_img, non_border); + + // apply the filter to the image + for (long r = first_row; r < last_row; ++r) + { + for (long c = first_col; c < last_col; ++c) + { + typedef typename image_traits<in_image_type>::pixel_type pixel_type; + typedef matrix<typename EXP::type,pixel_traits<pixel_type>::num,1> ptype; + ptype p; + ptype temp; + temp = 0; + for (long m = 0; m < filter.nr(); ++m) + { + for (long n = 0; n < filter.nc(); ++n) + { + // pull out the current pixel and put it into p + p = pixel_to_vector<typename EXP::type>(in_img[r-first_row+m][c-first_col+n]); + temp += p*filter(m,n); + } + } + + temp /= scale; + + pixel_type pp; + vector_to_pixel(pp, temp); + assign_pixel(out_img[r][c], pp); + } + } + + return non_border; + } + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type, + typename EXP + > + rectangle spatially_filter_image ( + const in_image_type& in_img, + out_image_type& out_img, + const matrix_exp<EXP>& filter + ) + { + return spatially_filter_image(in_img,out_img,filter,1); + } + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + + namespace impl + { + template < + typename in_image_type, + typename out_image_type, + typename EXP1, + typename EXP2, + typename T + > + rectangle grayscale_spatially_filter_image_separable ( + const in_image_type& in_img_, + out_image_type& out_img_, + const matrix_exp<EXP1>& _row_filter, + const matrix_exp<EXP2>& _col_filter, + T scale, + bool use_abs, + bool add_to + ) + { + const_temp_matrix<EXP1> row_filter(_row_filter); + const_temp_matrix<EXP2> col_filter(_col_filter); + COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false ); + COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false ); + + DLIB_ASSERT(scale != 0 && row_filter.size() != 0 && col_filter.size() != 0 && + is_vector(row_filter) && + is_vector(col_filter), + "\trectangle spatially_filter_image_separable()" + << "\n\t Invalid inputs were given to this function." + << "\n\t scale: "<< scale + << "\n\t row_filter.size(): "<< row_filter.size() + << "\n\t col_filter.size(): "<< col_filter.size() + << "\n\t is_vector(row_filter): "<< is_vector(row_filter) + << "\n\t is_vector(col_filter): "<< is_vector(col_filter) + ); + DLIB_ASSERT(is_same_object(in_img_, out_img_) == false, + "\trectangle spatially_filter_image_separable()" + << "\n\tYou must give two different image objects" + ); + + + const_image_view<in_image_type> in_img(in_img_); + image_view<out_image_type> out_img(out_img_); + + // if there isn't any input image then don't do anything + if (in_img.size() == 0) + { + out_img.clear(); + return rectangle(); + } + + out_img.set_size(in_img.nr(),in_img.nc()); + + + // figure out the range that we should apply the filter to + const long first_row = col_filter.size()/2; + const long first_col = row_filter.size()/2; + const long last_row = in_img.nr() - ((col_filter.size()-1)/2); + const long last_col = in_img.nc() - ((row_filter.size()-1)/2); + + const rectangle non_border = rectangle(first_col, first_row, last_col-1, last_row-1); + if (!add_to) + zero_border_pixels(out_img, non_border); + + typedef typename EXP1::type ptype; + + array2d<ptype> temp_img; + temp_img.set_size(in_img.nr(), in_img.nc()); + + // apply the row filter + for (long r = 0; r < in_img.nr(); ++r) + { + for (long c = first_col; c < last_col; ++c) + { + ptype p; + ptype temp = 0; + for (long n = 0; n < row_filter.size(); ++n) + { + // pull out the current pixel and put it into p + p = get_pixel_intensity(in_img[r][c-first_col+n]); + temp += p*row_filter(n); + } + temp_img[r][c] = temp; + } + } + + // apply the column filter + for (long r = first_row; r < last_row; ++r) + { + for (long c = first_col; c < last_col; ++c) + { + ptype temp = 0; + for (long m = 0; m < col_filter.size(); ++m) + { + temp += temp_img[r-first_row+m][c]*col_filter(m); + } + + temp /= scale; + + if (use_abs && temp < 0) + { + temp = -temp; + } + + // save this pixel to the output image + if (add_to == false) + { + assign_pixel(out_img[r][c], temp); + } + else + { + assign_pixel(out_img[r][c], temp + out_img[r][c]); + } + } + } + return non_border; + } + + } // namespace impl + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type, + typename EXP1, + typename EXP2 + > + struct is_float_filtering + { + const static bool value = is_same_type<typename image_traits<in_image_type>::pixel_type,float>::value && + is_same_type<typename image_traits<out_image_type>::pixel_type,float>::value && + is_same_type<typename EXP1::type,float>::value && + is_same_type<typename EXP2::type,float>::value; + }; + +// ---------------------------------------------------------------------------------------- + + // This overload is optimized to use SIMD instructions when filtering float images with + // float filters. + template < + typename in_image_type, + typename out_image_type, + typename EXP1, + typename EXP2 + > + rectangle float_spatially_filter_image_separable ( + const in_image_type& in_img_, + out_image_type& out_img_, + const matrix_exp<EXP1>& _row_filter, + const matrix_exp<EXP2>& _col_filter, + out_image_type& scratch_, + bool add_to = false + ) + { + // You can only use this function with images and filters containing float + // variables. + COMPILE_TIME_ASSERT((is_float_filtering<in_image_type,out_image_type,EXP1,EXP2>::value == true)); + + + const_temp_matrix<EXP1> row_filter(_row_filter); + const_temp_matrix<EXP2> col_filter(_col_filter); + DLIB_ASSERT(row_filter.size() != 0 && col_filter.size() != 0 && + is_vector(row_filter) && + is_vector(col_filter), + "\trectangle float_spatially_filter_image_separable()" + << "\n\t Invalid inputs were given to this function." + << "\n\t row_filter.size(): "<< row_filter.size() + << "\n\t col_filter.size(): "<< col_filter.size() + << "\n\t is_vector(row_filter): "<< is_vector(row_filter) + << "\n\t is_vector(col_filter): "<< is_vector(col_filter) + ); + DLIB_ASSERT(is_same_object(in_img_, out_img_) == false, + "\trectangle float_spatially_filter_image_separable()" + << "\n\tYou must give two different image objects" + ); + + + const_image_view<in_image_type> in_img(in_img_); + image_view<out_image_type> out_img(out_img_); + + // if there isn't any input image then don't do anything + if (in_img.size() == 0) + { + out_img.clear(); + return rectangle(); + } + + out_img.set_size(in_img.nr(),in_img.nc()); + + // figure out the range that we should apply the filter to + const long first_row = col_filter.size()/2; + const long first_col = row_filter.size()/2; + const long last_row = in_img.nr() - ((col_filter.size()-1)/2); + const long last_col = in_img.nc() - ((row_filter.size()-1)/2); + + const rectangle non_border = rectangle(first_col, first_row, last_col-1, last_row-1); + if (!add_to) + zero_border_pixels(out_img, non_border); + + image_view<out_image_type> scratch(scratch_); + scratch.set_size(in_img.nr(), in_img.nc()); + + // apply the row filter + for (long r = 0; r < in_img.nr(); ++r) + { + long c = first_col; + for (; c < last_col-7; c+=8) + { + simd8f p,p2,p3, temp = 0, temp2=0, temp3=0; + long n = 0; + for (; n < row_filter.size()-2; n+=3) + { + // pull out the current pixel and put it into p + p.load(&in_img[r][c-first_col+n]); + p2.load(&in_img[r][c-first_col+n+1]); + p3.load(&in_img[r][c-first_col+n+2]); + temp += p*row_filter(n); + temp2 += p2*row_filter(n+1); + temp3 += p3*row_filter(n+2); + } + for (; n < row_filter.size(); ++n) + { + // pull out the current pixel and put it into p + p.load(&in_img[r][c-first_col+n]); + temp += p*row_filter(n); + } + temp += temp2 + temp3; + temp.store(&scratch[r][c]); + } + for (; c < last_col; ++c) + { + float p; + float temp = 0; + for (long n = 0; n < row_filter.size(); ++n) + { + // pull out the current pixel and put it into p + p = in_img[r][c-first_col+n]; + temp += p*row_filter(n); + } + scratch[r][c] = temp; + } + } + + // apply the column filter + for (long r = first_row; r < last_row; ++r) + { + long c = first_col; + for (; c < last_col-7; c+=8) + { + simd8f p, p2, p3, temp = 0, temp2 = 0, temp3 = 0; + long m = 0; + for (; m < col_filter.size()-2; m+=3) + { + p.load(&scratch[r-first_row+m][c]); + p2.load(&scratch[r-first_row+m+1][c]); + p3.load(&scratch[r-first_row+m+2][c]); + temp += p*col_filter(m); + temp2 += p2*col_filter(m+1); + temp3 += p3*col_filter(m+2); + } + for (; m < col_filter.size(); ++m) + { + p.load(&scratch[r-first_row+m][c]); + temp += p*col_filter(m); + } + temp += temp2+temp3; + + // save this pixel to the output image + if (add_to == false) + { + temp.store(&out_img[r][c]); + } + else + { + p.load(&out_img[r][c]); + temp += p; + temp.store(&out_img[r][c]); + } + } + for (; c < last_col; ++c) + { + float temp = 0; + for (long m = 0; m < col_filter.size(); ++m) + { + temp += scratch[r-first_row+m][c]*col_filter(m); + } + + // save this pixel to the output image + if (add_to == false) + { + out_img[r][c] = temp; + } + else + { + out_img[r][c] += temp; + } + } + } + return non_border; + } + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type, + typename EXP1, + typename EXP2, + typename T + > + typename enable_if_c<pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale && + is_float_filtering<in_image_type,out_image_type,EXP1,EXP2>::value,rectangle>::type + spatially_filter_image_separable ( + const in_image_type& in_img, + out_image_type& out_img, + const matrix_exp<EXP1>& row_filter, + const matrix_exp<EXP2>& col_filter, + T scale, + bool use_abs = false, + bool add_to = false + ) + { + if (use_abs == false) + { + out_image_type scratch; + if (scale == 1) + return float_spatially_filter_image_separable(in_img, out_img, row_filter, col_filter, scratch, add_to); + else + return float_spatially_filter_image_separable(in_img, out_img, row_filter/scale, col_filter, scratch, add_to); + } + else + { + return impl::grayscale_spatially_filter_image_separable(in_img, out_img, row_filter, col_filter, scale, true, add_to); + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type, + typename EXP1, + typename EXP2, + typename T + > + typename enable_if_c<pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale && + !is_float_filtering<in_image_type,out_image_type,EXP1,EXP2>::value,rectangle>::type + spatially_filter_image_separable ( + const in_image_type& in_img, + out_image_type& out_img, + const matrix_exp<EXP1>& row_filter, + const matrix_exp<EXP2>& col_filter, + T scale, + bool use_abs = false, + bool add_to = false + ) + { + return impl::grayscale_spatially_filter_image_separable(in_img,out_img, row_filter, col_filter, scale, use_abs, add_to); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type, + typename EXP1, + typename EXP2, + typename T + > + typename disable_if_c<pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale,rectangle>::type + spatially_filter_image_separable ( + const in_image_type& in_img_, + out_image_type& out_img_, + const matrix_exp<EXP1>& _row_filter, + const matrix_exp<EXP2>& _col_filter, + T scale + ) + { + const_temp_matrix<EXP1> row_filter(_row_filter); + const_temp_matrix<EXP2> col_filter(_col_filter); + COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false ); + COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false ); + + DLIB_ASSERT(scale != 0 && row_filter.size() != 0 && col_filter.size() != 0 && + is_vector(row_filter) && + is_vector(col_filter), + "\trectangle spatially_filter_image_separable()" + << "\n\t Invalid inputs were given to this function." + << "\n\t scale: "<< scale + << "\n\t row_filter.size(): "<< row_filter.size() + << "\n\t col_filter.size(): "<< col_filter.size() + << "\n\t is_vector(row_filter): "<< is_vector(row_filter) + << "\n\t is_vector(col_filter): "<< is_vector(col_filter) + ); + DLIB_ASSERT(is_same_object(in_img_, out_img_) == false, + "\trectangle spatially_filter_image_separable()" + << "\n\tYou must give two different image objects" + ); + + + const_image_view<in_image_type> in_img(in_img_); + image_view<out_image_type> out_img(out_img_); + + // if there isn't any input image then don't do anything + if (in_img.size() == 0) + { + out_img.clear(); + return rectangle(); + } + + out_img.set_size(in_img.nr(),in_img.nc()); + + + // figure out the range that we should apply the filter to + const long first_row = col_filter.size()/2; + const long first_col = row_filter.size()/2; + const long last_row = in_img.nr() - ((col_filter.size()-1)/2); + const long last_col = in_img.nc() - ((row_filter.size()-1)/2); + + const rectangle non_border = rectangle(first_col, first_row, last_col-1, last_row-1); + zero_border_pixels(out_img, non_border); + + typedef typename image_traits<in_image_type>::pixel_type pixel_type; + typedef matrix<typename EXP1::type,pixel_traits<pixel_type>::num,1> ptype; + + array2d<ptype> temp_img; + temp_img.set_size(in_img.nr(), in_img.nc()); + + // apply the row filter + for (long r = 0; r < in_img.nr(); ++r) + { + for (long c = first_col; c < last_col; ++c) + { + ptype p; + ptype temp; + temp = 0; + for (long n = 0; n < row_filter.size(); ++n) + { + // pull out the current pixel and put it into p + p = pixel_to_vector<typename EXP1::type>(in_img[r][c-first_col+n]); + temp += p*row_filter(n); + } + temp_img[r][c] = temp; + } + } + + // apply the column filter + for (long r = first_row; r < last_row; ++r) + { + for (long c = first_col; c < last_col; ++c) + { + ptype temp; + temp = 0; + for (long m = 0; m < col_filter.size(); ++m) + { + temp += temp_img[r-first_row+m][c]*col_filter(m); + } + + temp /= scale; + + + // save this pixel to the output image + pixel_type p; + vector_to_pixel(p, temp); + assign_pixel(out_img[r][c], p); + } + } + return non_border; + } + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type, + typename EXP1, + typename EXP2 + > + rectangle spatially_filter_image_separable ( + const in_image_type& in_img, + out_image_type& out_img, + const matrix_exp<EXP1>& row_filter, + const matrix_exp<EXP2>& col_filter + ) + { + return spatially_filter_image_separable(in_img,out_img,row_filter,col_filter,1); + } + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type, + typename EXP1, + typename EXP2, + typename T + > + rectangle spatially_filter_image_separable_down ( + const unsigned long downsample, + const in_image_type& in_img_, + out_image_type& out_img_, + const matrix_exp<EXP1>& row_filter, + const matrix_exp<EXP2>& col_filter, + T scale, + bool use_abs = false, + bool add_to = false + ) + { + COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false ); + COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false ); + COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale == true ); + + DLIB_ASSERT(downsample > 0 && + scale != 0 && + row_filter.size()%2 == 1 && + col_filter.size()%2 == 1 && + is_vector(row_filter) && + is_vector(col_filter), + "\trectangle spatially_filter_image_separable_down()" + << "\n\t Invalid inputs were given to this function." + << "\n\t downsample: "<< downsample + << "\n\t scale: "<< scale + << "\n\t row_filter.size(): "<< row_filter.size() + << "\n\t col_filter.size(): "<< col_filter.size() + << "\n\t is_vector(row_filter): "<< is_vector(row_filter) + << "\n\t is_vector(col_filter): "<< is_vector(col_filter) + ); + DLIB_ASSERT(is_same_object(in_img_, out_img_) == false, + "\trectangle spatially_filter_image_separable_down()" + << "\n\tYou must give two different image objects" + ); + + + const_image_view<in_image_type> in_img(in_img_); + image_view<out_image_type> out_img(out_img_); + + // if there isn't any input image then don't do anything + if (in_img.size() == 0) + { + out_img.clear(); + return rectangle(); + } + + out_img.set_size((long)(std::ceil((double)in_img.nr()/downsample)), + (long)(std::ceil((double)in_img.nc()/downsample))); + + const double col_border = std::floor(col_filter.size()/2.0); + const double row_border = std::floor(row_filter.size()/2.0); + + // figure out the range that we should apply the filter to + const long first_row = (long)std::ceil(col_border/downsample); + const long first_col = (long)std::ceil(row_border/downsample); + const long last_row = (long)std::ceil((in_img.nr() - col_border)/downsample) - 1; + const long last_col = (long)std::ceil((in_img.nc() - row_border)/downsample) - 1; + + // zero border pixels + const rectangle non_border = rectangle(first_col, first_row, last_col, last_row); + zero_border_pixels(out_img,non_border); + + typedef typename EXP1::type ptype; + + array2d<ptype> temp_img; + temp_img.set_size(in_img.nr(), out_img.nc()); + + // apply the row filter + for (long r = 0; r < temp_img.nr(); ++r) + { + for (long c = non_border.left(); c <= non_border.right(); ++c) + { + ptype p; + ptype temp = 0; + for (long n = 0; n < row_filter.size(); ++n) + { + // pull out the current pixel and put it into p + p = get_pixel_intensity(in_img[r][c*downsample-row_filter.size()/2+n]); + temp += p*row_filter(n); + } + temp_img[r][c] = temp; + } + } + + // apply the column filter + for (long r = non_border.top(); r <= non_border.bottom(); ++r) + { + for (long c = non_border.left(); c <= non_border.right(); ++c) + { + ptype temp = 0; + for (long m = 0; m < col_filter.size(); ++m) + { + temp += temp_img[r*downsample-col_filter.size()/2+m][c]*col_filter(m); + } + + temp /= scale; + + if (use_abs && temp < 0) + { + temp = -temp; + } + + // save this pixel to the output image + if (add_to == false) + { + assign_pixel(out_img[r][c], temp); + } + else + { + assign_pixel(out_img[r][c], temp + out_img[r][c]); + } + } + } + + return non_border; + } + + template < + typename in_image_type, + typename out_image_type, + typename EXP1, + typename EXP2 + > + rectangle spatially_filter_image_separable_down ( + const unsigned long downsample, + const in_image_type& in_img, + out_image_type& out_img, + const matrix_exp<EXP1>& row_filter, + const matrix_exp<EXP2>& col_filter + ) + { + return spatially_filter_image_separable_down(downsample,in_img,out_img,row_filter,col_filter,1); + } + +// ---------------------------------------------------------------------------------------- + + template < + long NR, + long NC, + typename T, + typename U, + typename in_image_type + > + inline void separable_3x3_filter_block_grayscale ( + T (&block)[NR][NC], + const in_image_type& img_, + const long& r, + const long& c, + const U& fe1, // separable filter end + const U& fm, // separable filter middle + const U& fe2 // separable filter end 2 + ) + { + const_image_view<in_image_type> img(img_); + // make sure requires clause is not broken + DLIB_ASSERT(shrink_rect(get_rect(img),1).contains(c,r) && + shrink_rect(get_rect(img),1).contains(c+NC-1,r+NR-1), + "\t void separable_3x3_filter_block_grayscale()" + << "\n\t The sub-window doesn't fit inside the given image." + << "\n\t get_rect(img): " << get_rect(img) + << "\n\t (c,r): " << point(c,r) + << "\n\t (c+NC-1,r+NR-1): " << point(c+NC-1,r+NR-1) + ); + + + T row_filt[NR+2][NC]; + for (long rr = 0; rr < NR+2; ++rr) + { + for (long cc = 0; cc < NC; ++cc) + { + row_filt[rr][cc] = get_pixel_intensity(img[r+rr-1][c+cc-1])*fe1 + + get_pixel_intensity(img[r+rr-1][c+cc])*fm + + get_pixel_intensity(img[r+rr-1][c+cc+1])*fe2; + } + } + + for (long rr = 0; rr < NR; ++rr) + { + for (long cc = 0; cc < NC; ++cc) + { + block[rr][cc] = (row_filt[rr][cc]*fe1 + + row_filt[rr+1][cc]*fm + + row_filt[rr+2][cc]*fe2); + } + } + + } + +// ---------------------------------------------------------------------------------------- + + template < + long NR, + long NC, + typename T, + typename U, + typename in_image_type + > + inline void separable_3x3_filter_block_rgb ( + T (&block)[NR][NC], + const in_image_type& img_, + const long& r, + const long& c, + const U& fe1, // separable filter end + const U& fm, // separable filter middle + const U& fe2 // separable filter end 2 + ) + { + const_image_view<in_image_type> img(img_); + // make sure requires clause is not broken + DLIB_ASSERT(shrink_rect(get_rect(img),1).contains(c,r) && + shrink_rect(get_rect(img),1).contains(c+NC-1,r+NR-1), + "\t void separable_3x3_filter_block_rgb()" + << "\n\t The sub-window doesn't fit inside the given image." + << "\n\t get_rect(img): " << get_rect(img) + << "\n\t (c,r): " << point(c,r) + << "\n\t (c+NC-1,r+NR-1): " << point(c+NC-1,r+NR-1) + ); + + T row_filt[NR+2][NC]; + for (long rr = 0; rr < NR+2; ++rr) + { + for (long cc = 0; cc < NC; ++cc) + { + row_filt[rr][cc].red = img[r+rr-1][c+cc-1].red*fe1 + img[r+rr-1][c+cc].red*fm + img[r+rr-1][c+cc+1].red*fe2; + row_filt[rr][cc].green = img[r+rr-1][c+cc-1].green*fe1 + img[r+rr-1][c+cc].green*fm + img[r+rr-1][c+cc+1].green*fe2; + row_filt[rr][cc].blue = img[r+rr-1][c+cc-1].blue*fe1 + img[r+rr-1][c+cc].blue*fm + img[r+rr-1][c+cc+1].blue*fe2; + } + } + + for (long rr = 0; rr < NR; ++rr) + { + for (long cc = 0; cc < NC; ++cc) + { + block[rr][cc].red = row_filt[rr][cc].red*fe1 + row_filt[rr+1][cc].red*fm + row_filt[rr+2][cc].red*fe2; + block[rr][cc].green = row_filt[rr][cc].green*fe1 + row_filt[rr+1][cc].green*fm + row_filt[rr+2][cc].green*fe2; + block[rr][cc].blue = row_filt[rr][cc].blue*fe1 + row_filt[rr+1][cc].blue*fm + row_filt[rr+2][cc].blue*fe2; + } + } + + } + +// ---------------------------------------------------------------------------------------- + + inline double gaussian ( + double x, + double sigma + ) + { + DLIB_ASSERT(sigma > 0, + "\tdouble gaussian(x)" + << "\n\t sigma must be bigger than 0" + << "\n\t sigma: " << sigma + ); + const double sqrt_2_pi = 2.5066282746310002416123552393401041626930; + return 1.0/(sigma*sqrt_2_pi) * std::exp( -(x*x)/(2*sigma*sigma)); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + matrix<T,0,1> create_gaussian_filter ( + double sigma, + int max_size + ) + { + DLIB_ASSERT(sigma > 0 && max_size > 0 && (max_size%2)==1, + "\t matrix<T,0,1> create_gaussian_filter()" + << "\n\t Invalid inputs were given to this function." + << "\n\t sigma: " << sigma + << "\n\t max_size: " << max_size + ); + + // Adjust the size so that the ratio of the gaussian values isn't huge. + // This only matters when T is an integer type. However, we do it for + // all types so that the behavior of this function is always relatively + // the same. + while (gaussian(0,sigma)/gaussian(max_size/2,sigma) > 50) + --max_size; + + + matrix<double,0,1> f(max_size); + for (long i = 0; i < f.size(); ++i) + { + f(i) = gaussian(i-max_size/2, sigma); + } + + if (is_float_type<T>::value == false) + { + f /= f(0); + return matrix_cast<T>(round(f)); + } + else + { + return matrix_cast<T>(f); + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type + > + rectangle gaussian_blur ( + const in_image_type& in_img, + out_image_type& out_img, + double sigma = 1, + int max_size = 1001 + ) + { + DLIB_ASSERT(sigma > 0 && max_size > 0 && (max_size%2)==1 && + is_same_object(in_img, out_img) == false, + "\t void gaussian_blur()" + << "\n\t Invalid inputs were given to this function." + << "\n\t sigma: " << sigma + << "\n\t max_size: " << max_size + << "\n\t is_same_object(in_img,out_img): " << is_same_object(in_img,out_img) + ); + + if (sigma < 18) + { + typedef typename pixel_traits<typename image_traits<out_image_type>::pixel_type>::basic_pixel_type type; + typedef typename promote<type>::type ptype; + const matrix<ptype,0,1>& filt = create_gaussian_filter<ptype>(sigma, max_size); + ptype scale = sum(filt); + scale = scale*scale; + return spatially_filter_image_separable(in_img, out_img, filt, filt, scale); + } + else + { + // For large sigma we need to use a type with a lot of precision to avoid + // numerical problems. So we use double here. + typedef double ptype; + const matrix<ptype,0,1>& filt = create_gaussian_filter<ptype>(sigma, max_size); + ptype scale = sum(filt); + scale = scale*scale; + return spatially_filter_image_separable(in_img, out_img, filt, filt, scale); + } + + } + +// ---------------------------------------------------------------------------------------- + + namespace impl + { + template < + bool add_to, + typename image_type1, + typename image_type2 + > + void sum_filter ( + const image_type1& img_, + image_type2& out_, + const rectangle& rect + ) + { + const_image_view<image_type1> img(img_); + image_view<image_type2> out(out_); + DLIB_ASSERT(img.nr() == out.nr() && + img.nc() == out.nc() && + is_same_object(img_,out_) == false, + "\t void sum_filter()" + << "\n\t Invalid arguments given to this function." + << "\n\t img.nr(): " << img.nr() + << "\n\t img.nc(): " << img.nc() + << "\n\t out.nr(): " << out.nr() + << "\n\t out.nc(): " << out.nc() + << "\n\t is_same_object(img_,out_): " << is_same_object(img_,out_) + ); + + typedef typename image_traits<image_type1>::pixel_type pixel_type; + typedef typename promote<pixel_type>::type ptype; + + std::vector<ptype> column_sum; + column_sum.resize(img.nc() + rect.width(),0); + + const long top = -1 + rect.top(); + const long bottom = -1 + rect.bottom(); + long left = rect.left()-1; + + // initialize column_sum at row -1 + for (unsigned long j = 0; j < column_sum.size(); ++j) + { + rectangle strip(left,top,left,bottom); + strip = strip.intersect(get_rect(img)); + if (!strip.is_empty()) + { + column_sum[j] = sum(matrix_cast<ptype>(subm(mat(img),strip))); + } + + ++left; + } + + + const rectangle area = get_rect(img); + + // Save width to avoid computing it over and over. + const long width = rect.width(); + + + // Now do the bulk of the filtering work. + for (long r = 0; r < img.nr(); ++r) + { + // set to sum at point(-1,r). i.e. should be equal to sum(mat(img), translate_rect(rect, point(-1,r))) + // We compute it's value in the next loop. + ptype cur_sum = 0; + + // Update the first part of column_sum since we only work on the c+width part of column_sum + // in the main loop. + const long top = r + rect.top() - 1; + const long bottom = r + rect.bottom(); + for (long k = 0; k < width; ++k) + { + const long right = k-width + rect.right(); + + const ptype br_corner = area.contains(right,bottom) ? img[bottom][right] : 0; + const ptype tr_corner = area.contains(right,top) ? img[top][right] : 0; + // update the sum in this column now that we are on the next row + column_sum[k] = column_sum[k] + br_corner - tr_corner; + cur_sum += column_sum[k]; + } + + for (long c = 0; c < img.nc(); ++c) + { + const long top = r + rect.top() - 1; + const long bottom = r + rect.bottom(); + const long right = c + rect.right(); + + const ptype br_corner = area.contains(right,bottom) ? img[bottom][right] : 0; + const ptype tr_corner = area.contains(right,top) ? img[top][right] : 0; + + // update the sum in this column now that we are on the next row + column_sum[c+width] = column_sum[c+width] + br_corner - tr_corner; + + // add in the new right side of the rect and subtract the old right side. + cur_sum = cur_sum + column_sum[c+width] - column_sum[c]; + + if (add_to) + out[r][c] += static_cast<typename image_traits<image_type2>::pixel_type>(cur_sum); + else + out[r][c] = static_cast<typename image_traits<image_type2>::pixel_type>(cur_sum); + } + } + } + } + + template < + typename image_type1, + typename image_type2 + > + void sum_filter ( + const image_type1& img, + image_type2& out, + const rectangle& rect + ) + { + impl::sum_filter<true>(img,out,rect); + } + + template < + typename image_type1, + typename image_type2 + > + void sum_filter_assign ( + const image_type1& img, + image_type2& out, + const rectangle& rect + ) + { + set_image_size(out, num_rows(img), num_columns(img)); + impl::sum_filter<false>(img,out,rect); + } + +// ---------------------------------------------------------------------------------------- + + namespace impl + { + template <typename T> + class fast_deque + { + /* + This is a fast and minimal implementation of std::deque for + use with the max_filter. + + This object assumes that no more than max_size elements + will ever be pushed into it at a time. + */ + public: + + explicit fast_deque(unsigned long max_size) + { + // find a power of two that upper bounds max_size + mask = 2; + while (mask < max_size) + mask *= 2; + + clear(); + + data.resize(mask); + --mask; // make into bit mask + } + + void clear() + { + first = 1; + last = 0; + size = 0; + } + + bool empty() const + { + return size == 0; + } + + void pop_back() + { + last = (last-1)&mask; + --size; + } + + void push_back(const T& item) + { + last = (last+1)&mask; + ++size; + data[last] = item; + } + + void pop_front() + { + first = (first+1)&mask; + --size; + } + + const T& front() const + { + return data[first]; + } + + const T& back() const + { + return data[last]; + } + + private: + + std::vector<T> data; + unsigned long mask; + unsigned long first; + unsigned long last; + unsigned long size; + }; + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type1, + typename image_type2 + > + void max_filter ( + image_type1& img_, + image_type2& out_, + const long width, + const long height, + const typename image_traits<image_type1>::pixel_type& thresh + ) + { + image_view<image_type1> img(img_); + image_view<image_type2> out(out_); + DLIB_ASSERT( width > 0 && + height > 0 && + out.nr() == img.nr() && + out.nc() == img.nc() && + is_same_object(img_,out_) == false, + "\t void max_filter()" + << "\n\t Invalid arguments given to this function." + << "\n\t img.nr(): " << img.nr() + << "\n\t img.nc(): " << img.nc() + << "\n\t out.nr(): " << out.nr() + << "\n\t out.nc(): " << out.nc() + << "\n\t width: " << width + << "\n\t height: " << height + << "\n\t is_same_object(img_,out_): " << is_same_object(img_,out_) + ); + + typedef typename image_traits<image_type1>::pixel_type pixel_type; + + + dlib::impl::fast_deque<std::pair<long,pixel_type> > Q(std::max(width,height)); + + const long last_col = std::max(img.nc(), ((width-1)/2)); + const long last_row = std::max(img.nr(), ((height-1)/2)); + + // run max filter along rows of img + for (long r = 0; r < img.nr(); ++r) + { + Q.clear(); + for (long c = 0; c < (width-1)/2 && c < img.nc(); ++c) + { + while (!Q.empty() && img[r][c] >= Q.back().second) + Q.pop_back(); + Q.push_back(std::make_pair(c,img[r][c])); + } + + for (long c = (width-1)/2; c < img.nc(); ++c) + { + while (!Q.empty() && img[r][c] >= Q.back().second) + Q.pop_back(); + while (!Q.empty() && Q.front().first <= c-width) + Q.pop_front(); + Q.push_back(std::make_pair(c,img[r][c])); + + img[r][c-((width-1)/2)] = Q.front().second; + } + + for (long c = last_col; c < img.nc() + ((width-1)/2); ++c) + { + while (!Q.empty() && Q.front().first <= c-width) + Q.pop_front(); + + img[r][c-((width-1)/2)] = Q.front().second; + } + } + + // run max filter along columns of img. Store result in out. + for (long cc = 0; cc < img.nc(); ++cc) + { + Q.clear(); + for (long rr = 0; rr < (height-1)/2 && rr < img.nr(); ++rr) + { + while (!Q.empty() && img[rr][cc] >= Q.back().second) + Q.pop_back(); + Q.push_back(std::make_pair(rr,img[rr][cc])); + } + + for (long rr = (height-1)/2; rr < img.nr(); ++rr) + { + while (!Q.empty() && img[rr][cc] >= Q.back().second) + Q.pop_back(); + while (!Q.empty() && Q.front().first <= rr-height) + Q.pop_front(); + Q.push_back(std::make_pair(rr,img[rr][cc])); + + out[rr-((height-1)/2)][cc] += std::max(Q.front().second, thresh); + } + + for (long rr = last_row; rr < img.nr() + ((height-1)/2); ++rr) + { + while (!Q.empty() && Q.front().first <= rr-height) + Q.pop_front(); + + out[rr-((height-1)/2)][cc] += std::max(Q.front().second, thresh); + } + } + } + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_SPATIAL_FILTERINg_H_ + + diff --git a/ml/dlib/dlib/image_transforms/spatial_filtering_abstract.h b/ml/dlib/dlib/image_transforms/spatial_filtering_abstract.h new file mode 100644 index 000000000..5e200aa9a --- /dev/null +++ b/ml/dlib/dlib/image_transforms/spatial_filtering_abstract.h @@ -0,0 +1,487 @@ +// Copyright (C) 2006 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_SPATIAL_FILTERINg_ABSTRACT_ +#ifdef DLIB_SPATIAL_FILTERINg_ABSTRACT_ + +#include "../pixel.h" +#include "../matrix.h" +#include "../image_processing/generic_image.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type, + typename EXP, + typename T + > + rectangle spatially_filter_image ( + const in_image_type& in_img, + out_image_type& out_img, + const matrix_exp<EXP>& filter, + T scale = 1, + bool use_abs = false, + bool add_to = false + ); + /*! + requires + - in_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - out_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - in_img and out_img do not contain pixels with an alpha channel. That is, + pixel_traits::has_alpha is false for the pixels in these objects. + - is_same_object(in_img, out_img) == false + - T must be some scalar type + - filter.size() != 0 + - scale != 0 + - if (in_img doesn't contain grayscale pixels) then + - use_abs == false && add_to == false + (i.e. You can only use the use_abs and add_to options with grayscale images) + ensures + - Applies the given spatial filter to in_img and stores the result in out_img (i.e. + cross-correlates in_img with filter). Also divides each resulting pixel by scale. + - The intermediate filter computations will be carried out using variables of type EXP::type. + This is whatever scalar type is used inside the filter matrix. + - Pixel values are stored into out_img using the assign_pixel() function and therefore + any applicable color space conversion or value saturation is performed. Note that if + add_to is true then the filtered output value will be added to out_img rather than + overwriting the original value. + - if (in_img doesn't contain grayscale pixels) then + - The filter is applied to each color channel independently. + - if (use_abs == true) then + - pixel values after filtering that are < 0 are converted to their absolute values. + - The filter is applied such that it's centered over the pixel it writes its + output into. For centering purposes, we consider the center element of the + filter to be filter(filter.nr()/2,filter.nc()/2). This means that the filter + that writes its output to a pixel at location point(c,r) and is W by H (width + by height) pixels in size operates on exactly the pixels in the rectangle + centered_rect(point(c,r),W,H) within in_img. + - Pixels close enough to the edge of in_img to not have the filter still fit + inside the image are always set to zero. + - #out_img.nc() == in_img.nc() + - #out_img.nr() == in_img.nr() + - returns a rectangle which indicates what pixels in #out_img are considered + non-border pixels and therefore contain output from the filter. + - if (use_abs == false && all images and filers contain float types) then + - This function will use SIMD instructions and is particularly fast. So if + you can use this form of the function it can give a decent speed boost. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type, + typename EXP1, + typename EXP2, + typename T + > + rectangle spatially_filter_image_separable ( + const in_image_type& in_img, + out_image_type& out_img, + const matrix_exp<EXP1>& row_filter, + const matrix_exp<EXP2>& col_filter, + T scale = 1, + bool use_abs = false, + bool add_to = false + ); + /*! + requires + - in_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - out_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - in_img and out_img do not contain pixels with an alpha channel. That is, + pixel_traits::has_alpha is false for the pixels in these objects. + - is_same_object(in_img, out_img) == false + - T must be some scalar type + - scale != 0 + - row_filter.size() != 0 + - col_filter.size() != 0 + - is_vector(row_filter) == true + - is_vector(col_filter) == true + - if (in_img doesn't contain grayscale pixels) then + - use_abs == false && add_to == false + (i.e. You can only use the use_abs and add_to options with grayscale images) + ensures + - Applies the given separable spatial filter to in_img and stores the result in out_img. + Also divides each resulting pixel by scale. Calling this function has the same + effect as calling the regular spatially_filter_image() routine with a filter, + FILT, defined as follows: + - FILT(r,c) == col_filter(r)*row_filter(c) + - The intermediate filter computations will be carried out using variables of type EXP1::type. + This is whatever scalar type is used inside the row_filter matrix. + - Pixel values are stored into out_img using the assign_pixel() function and therefore + any applicable color space conversion or value saturation is performed. Note that if + add_to is true then the filtered output value will be added to out_img rather than + overwriting the original value. + - if (in_img doesn't contain grayscale pixels) then + - The filter is applied to each color channel independently. + - if (use_abs == true) then + - pixel values after filtering that are < 0 are converted to their absolute values + - The filter is applied such that it's centered over the pixel it writes its + output into. For centering purposes, we consider the center element of the + filter to be FILT(col_filter.size()/2,row_filter.size()/2). This means that + the filter that writes its output to a pixel at location point(c,r) and is W + by H (width by height) pixels in size operates on exactly the pixels in the + rectangle centered_rect(point(c,r),W,H) within in_img. + - Pixels close enough to the edge of in_img to not have the filter still fit + inside the image are always set to zero. + - #out_img.nc() == in_img.nc() + - #out_img.nr() == in_img.nr() + - returns a rectangle which indicates what pixels in #out_img are considered + non-border pixels and therefore contain output from the filter. + - if (use_abs == false && all images and filers contain float types) then + - This function will use SIMD instructions and is particularly fast. So if + you can use this form of the function it can give a decent speed boost. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type, + typename EXP1, + typename EXP2 + > + rectangle float_spatially_filter_image_separable ( + const in_image_type& in_img, + out_image_type& out_img, + const matrix_exp<EXP1>& row_filter, + const matrix_exp<EXP2>& col_filter, + out_image_type& scratch, + bool add_to = false + ); + /*! + requires + - in_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - out_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - in_img, out_img, row_filter, and col_filter must all contain float type elements. + - is_same_object(in_img, out_img) == false + - row_filter.size() != 0 + - col_filter.size() != 0 + - is_vector(row_filter) == true + - is_vector(col_filter) == true + ensures + - This function is identical to the above spatially_filter_image_separable() + function except that it can only be invoked on float images with float + filters. In fact, spatially_filter_image_separable() invokes + float_spatially_filter_image_separable() in those cases. So why is + float_spatially_filter_image_separable() in the public API? The reason is + because the separable filtering routines internally allocate an image each + time they are called. If you want to avoid this memory allocation then you + can call float_spatially_filter_image_separable() and provide the scratch + image as input. This allows you to reuse the same scratch image for many + calls to float_spatially_filter_image_separable() and thereby avoid having it + allocated and freed for each call. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type, + typename EXP1, + typename EXP2, + typename T + > + rectangle spatially_filter_image_separable_down ( + const unsigned long downsample, + const in_image_type& in_img, + out_image_type& out_img, + const matrix_exp<EXP1>& row_filter, + const matrix_exp<EXP2>& col_filter, + T scale = 1, + bool use_abs = false, + bool add_to = false + ); + /*! + requires + - in_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - out_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - in_img and out_img do not contain pixels with an alpha channel. That is, + pixel_traits::has_alpha is false for the pixels in these objects. + - out_img contains grayscale pixels. + - is_same_object(in_img, out_img) == false + - T must be some scalar type + - scale != 0 + - is_vector(row_filter) == true + - is_vector(col_filter) == true + - row_filter.size() % 2 == 1 (i.e. must be odd) + - col_filter.size() % 2 == 1 (i.e. must be odd) + - downsample > 0 + ensures + - This function is equivalent to calling + spatially_filter_image_separable(in_img,out_img,row_filter,col_filter,scale,use_abs,add_to) + and then downsampling the output image by a factor of downsample. Therefore, + we will have that: + - #out_img.nr() == ceil((double)in_img.nr()/downsample) + - #out_img.nc() == ceil((double)in_img.nc()/downsample) + - #out_img[r][c] == filtered pixel corresponding to in_img[r*downsample][c*downsample] + - returns a rectangle which indicates what pixels in #out_img are considered + non-border pixels and therefore contain output from the filter. + - Note that the first row and column of non-zero padded data are the following + - first_row == ceil(floor(col_filter.size()/2.0)/downsample) + - first_col == ceil(floor(row_filter.size()/2.0)/downsample) + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + long NR, + long NC, + typename T, + typename U, + typename in_image_type + > + inline void separable_3x3_filter_block_grayscale ( + T (&block)[NR][NC], + const in_image_type& img, + const long& r, + const long& c, + const U& fe1, + const U& fm, + const U& fe2 + ); + /*! + requires + - in_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - T and U should be scalar types + - shrink_rect(get_rect(img),1).contains(c,r) + - shrink_rect(get_rect(img),1).contains(c+NC-1,r+NR-1) + ensures + - Filters the image in the sub-window of img defined by a rectangle + with its upper left corner at (c,r) and lower right at (c+NC-1,r+NR-1). + - The output of the filter is stored in #block. Note that img will be + interpreted as a grayscale image. + - The filter used is defined by the separable filter [fe1 fm fe2]. So the + spatial filter is thus: + fe1*fe1 fe1*fm fe2*fe1 + fe1*fm fm*fm fe2*fm + fe1*fe2 fe2*fm fe2*fe2 + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + long NR, + long NC, + typename T, + typename U, + typename in_image_type + > + inline void separable_3x3_filter_block_rgb ( + T (&block)[NR][NC], + const in_image_type& img, + const long& r, + const long& c, + const U& fe1, + const U& fm, + const U& fe2 + ); + /*! + requires + - in_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - img must contain RGB pixels, that is pixel_traits::rgb == true for the pixels + in img. + - T should be a struct with .red .green and .blue members. + - U should be a scalar type + - shrink_rect(get_rect(img),1).contains(c,r) + - shrink_rect(get_rect(img),1).contains(c+NC-1,r+NR-1) + ensures + - Filters the image in the sub-window of img defined by a rectangle + with its upper left corner at (c,r) and lower right at (c+NC-1,r+NR-1). + - The output of the filter is stored in #block. Note that the filter is applied + to each color component independently. + - The filter used is defined by the separable filter [fe1 fm fe2]. So the + spatial filter is thus: + fe1*fe1 fe1*fm fe2*fe1 + fe1*fm fm*fm fe2*fm + fe1*fe2 fe2*fm fe2*fe2 + !*/ + +// ---------------------------------------------------------------------------------------- + + inline double gaussian ( + double x, + double sigma + ); + /*! + requires + - sigma > 0 + ensures + - computes and returns the value of a 1D Gaussian function with mean 0 + and standard deviation sigma at the given x value. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + matrix<T,0,1> create_gaussian_filter ( + double sigma, + int size + ); + /*! + requires + - sigma > 0 + - size > 0 + - size is an odd number + ensures + - returns a separable Gaussian filter F such that: + - is_vector(F) == true + - F.size() == size + - F is suitable for use with the spatially_filter_image_separable() routine + and its use with this function corresponds to running a Gaussian filter + of sigma width over an image. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type + > + rectangle gaussian_blur ( + const in_image_type& in_img, + out_image_type& out_img, + double sigma = 1, + int max_size = 1001 + ); + /*! + requires + - in_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - out_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - in_img and out_img do not contain pixels with an alpha channel. That is, + pixel_traits::has_alpha is false for the pixels in these objects. + - is_same_object(in_img, out_img) == false + - sigma > 0 + - max_size > 0 + - max_size is an odd number + ensures + - Filters in_img with a Gaussian filter of sigma width. The actual spatial filter will + be applied to pixel blocks that are at most max_size wide and max_size tall (note that + this function will automatically select a smaller block size as appropriate). The + results are stored into #out_img. + - Pixel values are stored into out_img using the assign_pixel() function and therefore + any applicable color space conversion or value saturation is performed. + - if (in_img doesn't contain grayscale pixels) then + - The filter is applied to each color channel independently. + - Pixels close enough to the edge of in_img to not have the filter still fit + inside the image are set to zero. + - #out_img.nc() == in_img.nc() + - #out_img.nr() == in_img.nr() + - returns a rectangle which indicates what pixels in #out_img are considered + non-border pixels and therefore contain output from the filter. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type1, + typename image_type2 + > + void sum_filter ( + const image_type1& img, + image_type2& out, + const rectangle& rect + ); + /*! + requires + - out.nr() == img.nr() + - out.nc() == img.nc() + - image_type1 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h and it must contain grayscale pixels. + - image_type2 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h and it must contain grayscale pixels. + - is_same_object(img,out) == false + ensures + - for all valid r and c: + - let SUM(r,c) == sum of pixels from img which are inside the rectangle + translate_rect(rect, point(c,r)). + - #out[r][c] == out[r][c] + SUM(r,c) + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type1, + typename image_type2 + > + void sum_filter_assign ( + const image_type1& img, + image_type2& out, + const rectangle& rect + ); + /*! + requires + - image_type1 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h and it must contain grayscale pixels. + - image_type2 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h and it must contain grayscale pixels. + - is_same_object(img,out) == false + ensures + - #out.nr() == img.nr() + - #out.nc() == img.nc() + - for all valid r and c: + - let SUM(r,c) == sum of pixels from img which are inside the rectangle + translate_rect(rect, point(c,r)). + - #out[r][c] == SUM(r,c) + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type1, + typename image_type2 + > + void max_filter ( + image_type1& img, + image_type2& out, + const long width, + const long height, + const typename image_traits<image_type1>::pixel_type& thresh + ); + /*! + requires + - out.nr() == img.nr() + - out.nc() == img.nc() + - image_type1 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h and it must contain grayscale pixels. + - image_type2 == an image object that implements the interface defined in + dlib/image_processing/generic_image.h and it must contain grayscale pixels. + - is_same_object(img,out) == false + - width > 0 && height > 0 + ensures + - for all valid r and c: + - let MAX(r,c) == maximum of pixels from img which are inside the rectangle + centered_rect(point(c,r), width, height) + - if (MAX(r,c) >= thresh) + - #out[r][c] == out[r][c] + MAX(r,c) + - else + - #out[r][c] == out[r][c] + thresh + - Does not change the size of img. + - Uses img as scratch space. Therefore, the pixel values in img will have + been modified by this function. That is, max_filter() destroys the contents + of img. + !*/ + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_SPATIAL_FILTERINg_ABSTRACT_ + diff --git a/ml/dlib/dlib/image_transforms/thresholding.h b/ml/dlib/dlib/image_transforms/thresholding.h new file mode 100644 index 000000000..e4fb02c4a --- /dev/null +++ b/ml/dlib/dlib/image_transforms/thresholding.h @@ -0,0 +1,340 @@ +// Copyright (C) 2006 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_THRESHOLDINg_ +#define DLIB_THRESHOLDINg_ + +#include "../pixel.h" +#include "thresholding_abstract.h" +#include "equalize_histogram.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + const unsigned char on_pixel = 255; + const unsigned char off_pixel = 0; + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type + > + void threshold_image ( + const in_image_type& in_img_, + out_image_type& out_img_, + typename pixel_traits<typename image_traits<in_image_type>::pixel_type>::basic_pixel_type thresh + ) + { + COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false ); + COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false ); + + COMPILE_TIME_ASSERT(pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale); + + const_image_view<in_image_type> in_img(in_img_); + image_view<out_image_type> out_img(out_img_); + + // if there isn't any input image then don't do anything + if (in_img.size() == 0) + { + out_img.clear(); + return; + } + + out_img.set_size(in_img.nr(),in_img.nc()); + + for (long r = 0; r < in_img.nr(); ++r) + { + for (long c = 0; c < in_img.nc(); ++c) + { + if (get_pixel_intensity(in_img[r][c]) >= thresh) + assign_pixel(out_img[r][c], on_pixel); + else + assign_pixel(out_img[r][c], off_pixel); + } + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + void threshold_image ( + image_type& img, + typename pixel_traits<typename image_traits<image_type>::pixel_type>::basic_pixel_type thresh + ) + { + threshold_image(img,img,thresh); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type + > + void auto_threshold_image ( + const in_image_type& in_img_, + out_image_type& out_img_ + ) + { + COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false ); + COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false ); + COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<in_image_type>::pixel_type>::is_unsigned == true ); + COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<out_image_type>::pixel_type>::is_unsigned == true ); + + COMPILE_TIME_ASSERT(pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale); + + image_view<out_image_type> out_img(out_img_); + + // if there isn't any input image then don't do anything + if (image_size(in_img_) == 0) + { + out_img.clear(); + return; + } + + unsigned long thresh; + // find the threshold we should use + matrix<unsigned long,1> hist; + get_histogram(in_img_,hist); + + const_image_view<in_image_type> in_img(in_img_); + + // Start our two means (a and b) out at the ends of the histogram + long a = 0; + long b = hist.size()-1; + bool moved_a = true; + bool moved_b = true; + while (moved_a || moved_b) + { + moved_a = false; + moved_b = false; + + // catch the degenerate case where the histogram is empty + if (a >= b) + break; + + if (hist(a) == 0) + { + ++a; + moved_a = true; + } + + if (hist(b) == 0) + { + --b; + moved_b = true; + } + } + + // now do k-means clustering with k = 2 on the histogram. + moved_a = true; + moved_b = true; + while (moved_a || moved_b) + { + moved_a = false; + moved_b = false; + + int64 a_hits = 0; + int64 b_hits = 0; + int64 a_mass = 0; + int64 b_mass = 0; + + for (long i = 0; i < hist.size(); ++i) + { + // if i is closer to a + if (std::abs(i-a) < std::abs(i-b)) + { + a_mass += hist(i)*i; + a_hits += hist(i); + } + else // if i is closer to b + { + b_mass += hist(i)*i; + b_hits += hist(i); + } + } + + long new_a = (a_mass + a_hits/2)/a_hits; + long new_b = (b_mass + b_hits/2)/b_hits; + + if (new_a != a) + { + moved_a = true; + a = new_a; + } + + if (new_b != b) + { + moved_b = true; + b = new_b; + } + } + + // put the threshold between the two means we found + thresh = (a + b)/2; + + // now actually apply the threshold + threshold_image(in_img_,out_img_,thresh); + } + + template < + typename image_type + > + void auto_threshold_image ( + image_type& img + ) + { + auto_threshold_image(img,img); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type + > + void hysteresis_threshold ( + const in_image_type& in_img_, + out_image_type& out_img_, + typename pixel_traits<typename image_traits<in_image_type>::pixel_type>::basic_pixel_type lower_thresh, + typename pixel_traits<typename image_traits<in_image_type>::pixel_type>::basic_pixel_type upper_thresh + ) + { + COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false ); + COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false ); + + COMPILE_TIME_ASSERT(pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale); + + DLIB_ASSERT( lower_thresh <= upper_thresh && is_same_object(in_img_, out_img_) == false, + "\tvoid hysteresis_threshold(in_img_, out_img_, lower_thresh, upper_thresh)" + << "\n\tYou can't use an upper_thresh that is less than your lower_thresh" + << "\n\tlower_thresh: " << lower_thresh + << "\n\tupper_thresh: " << upper_thresh + << "\n\tis_same_object(in_img_,out_img_): " << is_same_object(in_img_,out_img_) + ); + + const_image_view<in_image_type> in_img(in_img_); + image_view<out_image_type> out_img(out_img_); + + // if there isn't any input image then don't do anything + if (in_img.size() == 0) + { + out_img.clear(); + return; + } + + out_img.set_size(in_img.nr(),in_img.nc()); + assign_all_pixels(out_img, off_pixel); + + const long size = 1000; + long rstack[size]; + long cstack[size]; + + // now do the thresholding + for (long r = 0; r < in_img.nr(); ++r) + { + for (long c = 0; c < in_img.nc(); ++c) + { + typename pixel_traits<typename image_traits<in_image_type>::pixel_type>::basic_pixel_type p; + assign_pixel(p,in_img[r][c]); + if (p >= upper_thresh) + { + // now do line following for pixels >= lower_thresh. + // set the stack position to 0. + long pos = 1; + rstack[0] = r; + cstack[0] = c; + + while (pos > 0) + { + --pos; + const long r = rstack[pos]; + const long c = cstack[pos]; + + // This is the base case of our recursion. We want to stop if we hit a + // pixel we have already visited. + if (out_img[r][c] == on_pixel) + continue; + + out_img[r][c] = on_pixel; + + // put the neighbors of this pixel on the stack if they are bright enough + if (r-1 >= 0) + { + if (pos < size && get_pixel_intensity(in_img[r-1][c]) >= lower_thresh) + { + rstack[pos] = r-1; + cstack[pos] = c; + ++pos; + } + if (pos < size && c-1 >= 0 && get_pixel_intensity(in_img[r-1][c-1]) >= lower_thresh) + { + rstack[pos] = r-1; + cstack[pos] = c-1; + ++pos; + } + if (pos < size && c+1 < in_img.nc() && get_pixel_intensity(in_img[r-1][c+1]) >= lower_thresh) + { + rstack[pos] = r-1; + cstack[pos] = c+1; + ++pos; + } + } + + if (pos < size && c-1 >= 0 && get_pixel_intensity(in_img[r][c-1]) >= lower_thresh) + { + rstack[pos] = r; + cstack[pos] = c-1; + ++pos; + } + if (pos < size && c+1 < in_img.nc() && get_pixel_intensity(in_img[r][c+1]) >= lower_thresh) + { + rstack[pos] = r; + cstack[pos] = c+1; + ++pos; + } + + if (r+1 < in_img.nr()) + { + if (pos < size && get_pixel_intensity(in_img[r+1][c]) >= lower_thresh) + { + rstack[pos] = r+1; + cstack[pos] = c; + ++pos; + } + if (pos < size && c-1 >= 0 && get_pixel_intensity(in_img[r+1][c-1]) >= lower_thresh) + { + rstack[pos] = r+1; + cstack[pos] = c-1; + ++pos; + } + if (pos < size && c+1 < in_img.nc() && get_pixel_intensity(in_img[r+1][c+1]) >= lower_thresh) + { + rstack[pos] = r+1; + cstack[pos] = c+1; + ++pos; + } + } + + } // end while (pos >= 0) + + } + else + { + out_img[r][c] = off_pixel; + } + + } + } + } + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_THRESHOLDINg_ + diff --git a/ml/dlib/dlib/image_transforms/thresholding_abstract.h b/ml/dlib/dlib/image_transforms/thresholding_abstract.h new file mode 100644 index 000000000..e7c1e8826 --- /dev/null +++ b/ml/dlib/dlib/image_transforms/thresholding_abstract.h @@ -0,0 +1,139 @@ +// Copyright (C) 2006 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_THRESHOLDINg_ABSTRACT_ +#ifdef DLIB_THRESHOLDINg_ABSTRACT_ + +#include "../pixel.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + const unsigned char on_pixel = 255; + const unsigned char off_pixel = 0; + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type + > + void threshold_image ( + const in_image_type& in_img, + out_image_type& out_img, + typename pixel_traits<typename image_traits<in_image_type>::pixel_type>::basic_pixel_type thresh + ); + /*! + requires + - in_image_type == is an implementation of array2d/array2d_kernel_abstract.h + - out_image_type == is an implementation of array2d/array2d_kernel_abstract.h + - pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale == true + - pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false + - pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false + ensures + - #out_img == the thresholded version of in_img (in_img is converted to a grayscale + intensity image if it is color). Pixels in in_img with grayscale values >= thresh + have an output value of on_pixel and all others have a value of off_pixel. + - #out_img.nc() == in_img.nc() + - #out_img.nr() == in_img.nr() + !*/ + + template < + typename image_type + > + void threshold_image ( + image_type& img, + typename pixel_traits<typename image_traits<image_type>::pixel_type>::basic_pixel_type thresh + ); + /*! + requires + - it is valid to call threshold_image(img,img,thresh); + ensures + - calls threshold_image(img,img,thresh); + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type + > + void auto_threshold_image ( + const in_image_type& in_img, + out_image_type& out_img + ); + /*! + requires + - in_image_type == is an implementation of array2d/array2d_kernel_abstract.h + - out_image_type == is an implementation of array2d/array2d_kernel_abstract.h + - pixel_traits<typename image_traits<in_image_type>::pixel_type>::max() <= 65535 + - pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false + - pixel_traits<typename image_traits<in_image_type>::pixel_type>::is_unsigned == true + - pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale == true + - pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false + - pixel_traits<typename image_traits<out_image_type>::pixel_type>::is_unsigned == true + ensures + - #out_img == the thresholded version of in_img (in_img is converted to a grayscale + intensity image if it is color). Pixels in in_img with grayscale values >= thresh + have an output value of on_pixel and all others have a value of off_pixel. + - The thresh value used is determined by performing a k-means clustering + on the input image histogram with a k of 2. The point between the two + means found is used as the thresh value. + - #out_img.nc() == in_img.nc() + - #out_img.nr() == in_img.nr() + !*/ + + template < + typename image_type + > + void auto_threshold_image ( + image_type& img + ); + /*! + requires + - it is valid to call auto_threshold_image(img,img); + ensures + - calls auto_threshold_image(img,img); + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename in_image_type, + typename out_image_type + > + void hysteresis_threshold ( + const in_image_type& in_img, + out_image_type& out_img, + typename pixel_traits<typename image_traits<in_image_type>::pixel_type>::basic_pixel_type lower_thresh, + typename pixel_traits<typename image_traits<in_image_type>::pixel_type>::basic_pixel_type upper_thresh + ); + /*! + requires + - in_image_type == is an implementation of array2d/array2d_kernel_abstract.h + - out_image_type == is an implementation of array2d/array2d_kernel_abstract.h + - pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale == true + - pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false + - pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false + - lower_thresh <= upper_thresh + - is_same_object(in_img, out_img) == false + ensures + - #out_img == the hysteresis thresholded version of in_img (in_img is converted to a + grayscale intensity image if it is color). Pixels in in_img with grayscale + values >= upper_thresh have an output value of on_pixel and all others have a + value of off_pixel unless they are >= lower_thresh and are connected to a pixel + with a value >= upper_thresh, in which case they have a value of on_pixel. Here + pixels are connected if there is a path between them composed of pixels that + would receive an output of on_pixel. + - #out_img.nc() == in_img.nc() + - #out_img.nr() == in_img.nr() + !*/ + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_THRESHOLDINg_ABSTRACT_ + + |