diff options
Diffstat (limited to 'ml/dlib/dlib/image_transforms')
36 files changed, 0 insertions, 16458 deletions
diff --git a/ml/dlib/dlib/image_transforms/assign_image.h b/ml/dlib/dlib/image_transforms/assign_image.h deleted file mode 100644 index c69878efa..000000000 --- a/ml/dlib/dlib/image_transforms/assign_image.h +++ /dev/null @@ -1,385 +0,0 @@ -// Copyright (C) 2007 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_ASSIGN_IMAGe_ -#define DLIB_ASSIGN_IMAGe_ - -#include "../pixel.h" -#include "assign_image_abstract.h" -#include "../statistics.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename dest_image_type, - typename src_image_type - > - void impl_assign_image ( - image_view<dest_image_type>& dest, - const src_image_type& src - ) - { - dest.set_size(src.nr(),src.nc()); - for (long r = 0; r < src.nr(); ++r) - { - for (long c = 0; c < src.nc(); ++c) - { - assign_pixel(dest[r][c], src(r,c)); - } - } - } - - template < - typename dest_image_type, - typename src_image_type - > - void impl_assign_image ( - dest_image_type& dest_, - const src_image_type& src - ) - { - image_view<dest_image_type> dest(dest_); - impl_assign_image(dest, src); - } - - template < - typename dest_image_type, - typename src_image_type - > - void assign_image ( - dest_image_type& dest, - const src_image_type& src - ) - { - // check for the case where dest is the same object as src - if (is_same_object(dest,src)) - return; - - impl_assign_image(dest, mat(src)); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename dest_image_type, - typename src_image_type - > - void impl_assign_image_scaled ( - image_view<dest_image_type>& dest, - const src_image_type& src, - const double thresh - ) - { - DLIB_ASSERT( thresh > 0, - "\tvoid assign_image_scaled()" - << "\n\t You have given an threshold value" - << "\n\t thresh: " << thresh - ); - - - typedef typename image_traits<dest_image_type>::pixel_type dest_pixel; - - // If the destination has a dynamic range big enough to contain the source image data then just do a - // regular assign_image() - if (pixel_traits<dest_pixel>::max() >= pixel_traits<typename src_image_type::type>::max() && - pixel_traits<dest_pixel>::min() <= pixel_traits<typename src_image_type::type>::min() ) - { - impl_assign_image(dest, src); - return; - } - - dest.set_size(src.nr(),src.nc()); - - if (src.size() == 0) - return; - - if (src.size() == 1) - { - impl_assign_image(dest, src); - return; - } - - // gather image statistics - running_stats<double> rs; - for (long r = 0; r < src.nr(); ++r) - { - for (long c = 0; c < src.nc(); ++c) - { - rs.add(get_pixel_intensity(src(r,c))); - } - } - typedef typename pixel_traits<typename src_image_type::type>::basic_pixel_type spix_type; - - if (std::numeric_limits<spix_type>::is_integer) - { - // If the destination has a dynamic range big enough to contain the source image data then just do a - // regular assign_image() - if (pixel_traits<dest_pixel>::max() >= rs.max() && - pixel_traits<dest_pixel>::min() <= rs.min() ) - { - impl_assign_image(dest, src); - return; - } - } - - // Figure out the range of pixel values based on image statistics. There might be some huge - // outliers so don't just pick the min and max values. - const double upper = std::min(rs.mean() + thresh*rs.stddev(), rs.max()); - const double lower = std::max(rs.mean() - thresh*rs.stddev(), rs.min()); - - - const double dest_min = pixel_traits<dest_pixel>::min(); - const double dest_max = pixel_traits<dest_pixel>::max(); - - const double scale = (upper!=lower)? ((dest_max - dest_min) / (upper - lower)) : 0; - - for (long r = 0; r < src.nr(); ++r) - { - for (long c = 0; c < src.nc(); ++c) - { - const double val = get_pixel_intensity(src(r,c)) - lower; - - assign_pixel(dest[r][c], scale*val + dest_min); - } - } - } - - template < - typename dest_image_type, - typename src_image_type - > - void impl_assign_image_scaled ( - dest_image_type& dest_, - const src_image_type& src, - const double thresh - ) - { - image_view<dest_image_type> dest(dest_); - impl_assign_image_scaled(dest, src, thresh); - } - - template < - typename dest_image_type, - typename src_image_type - > - void assign_image_scaled ( - dest_image_type& dest, - const src_image_type& src, - const double thresh = 4 - ) - { - // check for the case where dest is the same object as src - if (is_same_object(dest,src)) - return; - - impl_assign_image_scaled(dest, mat(src),thresh); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename dest_image_type, - typename src_pixel_type - > - void assign_all_pixels ( - image_view<dest_image_type>& dest_img, - const src_pixel_type& src_pixel - ) - { - for (long r = 0; r < dest_img.nr(); ++r) - { - for (long c = 0; c < dest_img.nc(); ++c) - { - assign_pixel(dest_img[r][c], src_pixel); - } - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename dest_image_type, - typename src_pixel_type - > - void assign_all_pixels ( - dest_image_type& dest_img_, - const src_pixel_type& src_pixel - ) - { - image_view<dest_image_type> dest_img(dest_img_); - assign_all_pixels(dest_img, src_pixel); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type - > - void assign_border_pixels ( - image_view<image_type>& img, - long x_border_size, - long y_border_size, - const typename image_traits<image_type>::pixel_type& p - ) - { - DLIB_ASSERT( x_border_size >= 0 && y_border_size >= 0, - "\tvoid assign_border_pixels(img, p, border_size)" - << "\n\tYou have given an invalid border_size" - << "\n\tx_border_size: " << x_border_size - << "\n\ty_border_size: " << y_border_size - ); - - y_border_size = std::min(y_border_size, img.nr()/2+1); - x_border_size = std::min(x_border_size, img.nc()/2+1); - - // assign the top border - for (long r = 0; r < y_border_size; ++r) - { - for (long c = 0; c < img.nc(); ++c) - { - img[r][c] = p; - } - } - - // assign the bottom border - for (long r = img.nr()-y_border_size; r < img.nr(); ++r) - { - for (long c = 0; c < img.nc(); ++c) - { - img[r][c] = p; - } - } - - // now assign the two sides - for (long r = y_border_size; r < img.nr()-y_border_size; ++r) - { - // left border - for (long c = 0; c < x_border_size; ++c) - img[r][c] = p; - - // right border - for (long c = img.nc()-x_border_size; c < img.nc(); ++c) - img[r][c] = p; - } - } - - template < - typename image_type - > - void assign_border_pixels ( - image_type& img_, - long x_border_size, - long y_border_size, - const typename image_traits<image_type>::pixel_type& p - ) - { - image_view<image_type> img(img_); - assign_border_pixels(img, x_border_size, y_border_size, p); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type - > - void zero_border_pixels ( - image_type& img, - long x_border_size, - long y_border_size - ) - { - DLIB_ASSERT( x_border_size >= 0 && y_border_size >= 0, - "\tvoid zero_border_pixels(img, p, border_size)" - << "\n\tYou have given an invalid border_size" - << "\n\tx_border_size: " << x_border_size - << "\n\ty_border_size: " << y_border_size - ); - - typename image_traits<image_type>::pixel_type zero_pixel; - assign_pixel_intensity(zero_pixel, 0); - assign_border_pixels(img, x_border_size, y_border_size, zero_pixel); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type - > - void zero_border_pixels ( - image_view<image_type>& img, - long x_border_size, - long y_border_size - ) - { - DLIB_ASSERT( x_border_size >= 0 && y_border_size >= 0, - "\tvoid zero_border_pixels(img, p, border_size)" - << "\n\tYou have given an invalid border_size" - << "\n\tx_border_size: " << x_border_size - << "\n\ty_border_size: " << y_border_size - ); - - typename image_traits<image_type>::pixel_type zero_pixel; - assign_pixel_intensity(zero_pixel, 0); - assign_border_pixels(img, x_border_size, y_border_size, zero_pixel); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type - > - void zero_border_pixels ( - image_view<image_type>& img, - rectangle inside - ) - { - inside = inside.intersect(get_rect(img)); - if (inside.is_empty()) - { - assign_all_pixels(img, 0); - return; - } - - for (long r = 0; r < inside.top(); ++r) - { - for (long c = 0; c < img.nc(); ++c) - assign_pixel(img[r][c], 0); - } - for (long r = inside.top(); r <= inside.bottom(); ++r) - { - for (long c = 0; c < inside.left(); ++c) - assign_pixel(img[r][c], 0); - for (long c = inside.right()+1; c < img.nc(); ++c) - assign_pixel(img[r][c], 0); - } - for (long r = inside.bottom()+1; r < img.nr(); ++r) - { - for (long c = 0; c < img.nc(); ++c) - assign_pixel(img[r][c], 0); - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type - > - void zero_border_pixels ( - image_type& img_, - const rectangle& inside - ) - { - image_view<image_type> img(img_); - zero_border_pixels(img, inside); - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_ASSIGN_IMAGe_ - - - diff --git a/ml/dlib/dlib/image_transforms/assign_image_abstract.h b/ml/dlib/dlib/image_transforms/assign_image_abstract.h deleted file mode 100644 index 5ba262ba5..000000000 --- a/ml/dlib/dlib/image_transforms/assign_image_abstract.h +++ /dev/null @@ -1,196 +0,0 @@ -// Copyright (C) 2007 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_ASSIGN_IMAGe_ABSTRACT -#ifdef DLIB_ASSIGN_IMAGe_ABSTRACT - -#include "../pixel.h" -#include "../image_processing/generic_image.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename dest_image_type, - typename src_image_type - > - void assign_image ( - dest_image_type& dest_img, - const src_image_type& src_img - ); - /*! - requires - - src_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h or any object convertible to a matrix - via mat(). - - dest_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h or an image_view. - ensures - - #dest_img.nc() == src_img.nc() - - #dest_img.nr() == src_img.nr() - - for all valid r and c: - - performs assign_pixel(#dest_img[r][c],src_img[r][c]) - (i.e. copies the src image to dest image) - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename dest_image_type, - typename src_image_type - > - void assign_image_scaled ( - dest_image_type& dest_img, - const src_image_type& src_img, - const double thresh = 4 - ); - /*! - requires - - src_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h or any object convertible to a matrix - via mat(). - - dest_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h or an image_view. - - thresh > 0 - ensures - - #dest_img.nc() == src_img.nc() - - #dest_img.nr() == src_img.nr() - - if (dest_img's pixels have a wide enough dynamic range to contain all the - pixels in src_img. (Note that dynamic range is determined by the min() and - max() pixel_traits properties)) then - - performs: assign_image(dest_img, src_img) - (i.e. in this case, no scaling is performed. Just a normal color space - conversion and copy ) - - else - - #dest_img will be converted to a grayscale image - - scales the contents of src_img into the dynamic range of dest_img and then - assigns the result into dest_img. The thresh parameter is used to filter - source pixel values which are outliers. These outliers will saturate - at the edge of the destination image's dynamic range. - - Specifically, for all valid r and c: - - scales get_pixel_intensity(src_img[r][c]) into the dynamic range - of the dest_img. This is done by computing the mean and standard - deviation of src_img. Call the mean M and the standard deviation - D. Then the scaling from src_img to dest_img is performed using - the following mapping: - let SRC_UPPER = min(M + thresh*D, max(mat(src_img))) - let SRC_LOWER = max(M - thresh*D, min(mat(src_img))) - let DEST_UPPER = pixel_traits<image_traits<dest_image_type>::pixel_type>::max() - let DEST_LOWER = pixel_traits<image_traits<dest_image_type>::pixel_type>::min() - - MAPPING: [SRC_LOWER, SRC_UPPER] -> [DEST_LOWER, DEST_UPPER] - - Where this mapping is a linear mapping of values from the left range - into the right range of values. Source pixel values outside the left - range are modified to be at the appropriate end of the range. - - The scaled pixel is then stored in dest_img[r][c]. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename dest_image_type, - typename src_pixel_type - > - void assign_all_pixels ( - dest_image_type& dest_img, - const src_pixel_type& src_pixel - ); - /*! - requires - - dest_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h or an image_view. - - pixel_traits<src_pixel_type> is defined - ensures - - #dest_img.nc() == dest_img.nc() - - #dest_img.nr() == dest_img.nr() - (i.e. the size of dest_img isn't changed by this function) - - for all valid r and c: - - performs assign_pixel(#dest_img[r][c],src_pixel) - (i.e. assigns the src pixel to every pixel in the dest image) - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type - > - void assign_border_pixels ( - image_type& img, - long x_border_size, - long y_border_size, - const typename image_traits<image_type>::pixel_type& p - ); - /*! - requires - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h or an image_view - - x_border_size >= 0 - - y_border_size >= 0 - ensures - - #img.nc() == img.nc() - - #img.nr() == img.nr() - (i.e. the size of img isn't changed by this function) - - for all valid r such that r+y_border_size or r-y_border_size gives an invalid row - - for all valid c such that c+x_border_size or c-x_border_size gives an invalid column - - performs assign_pixel(#img[r][c],p) - (i.e. assigns the given pixel to every pixel in the border of img) - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type - > - void zero_border_pixels ( - image_type& img, - long x_border_size, - long y_border_size - ); - /*! - requires - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h or an image_view - - x_border_size >= 0 - - y_border_size >= 0 - ensures - - #img.nc() == img.nc() - - #img.nr() == img.nr() - (i.e. the size of img isn't changed by this function) - - for all valid r such that r+y_border_size or r-y_border_size gives an invalid row - - for all valid c such that c+x_border_size or c-x_border_size gives an invalid column - - performs assign_pixel(#img[r][c], 0 ) - (i.e. assigns 0 to every pixel in the border of img) - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type - > - void zero_border_pixels ( - image_type& img, - rectangle inside - ); - /*! - requires - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h or an image_view - ensures - - #img.nc() == img.nc() - - #img.nr() == img.nr() - (i.e. the size of img isn't changed by this function) - - All the pixels in img that are not contained inside the inside rectangle - given to this function are set to 0. That is, anything not "inside" is on - the border and set to 0. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_ASSIGN_IMAGe_ABSTRACT - - diff --git a/ml/dlib/dlib/image_transforms/colormaps.h b/ml/dlib/dlib/image_transforms/colormaps.h deleted file mode 100644 index 813d1ff75..000000000 --- a/ml/dlib/dlib/image_transforms/colormaps.h +++ /dev/null @@ -1,269 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_RANDOMLY_COlOR_IMAGE_Hh_ -#define DLIB_RANDOMLY_COlOR_IMAGE_Hh_ - -#include "colormaps_abstract.h" -#include "../hash.h" -#include "../pixel.h" -#include "../matrix.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template <typename T> - struct op_randomly_color_image : does_not_alias - { - op_randomly_color_image( const T& img_) : img(img_){} - - const T& img; - - const static long cost = 7; - const static long NR = 0; - const static long NC = 0; - typedef rgb_pixel type; - typedef const rgb_pixel const_ret_type; - typedef default_memory_manager mem_manager_type; - typedef row_major_layout layout_type; - - const_ret_type apply (long r, long c ) const - { - const unsigned long gray = get_pixel_intensity(mat(img)(r,c)); - if (gray != 0) - { - const uint32 h = murmur_hash3_2(gray,0); - rgb_pixel pix; - pix.red = static_cast<unsigned char>(h)%200 + 55; - pix.green = static_cast<unsigned char>(h>>8)%200 + 55; - pix.blue = static_cast<unsigned char>(h>>16)%200 + 55; - return pix; - } - else - { - // keep black pixels black - return rgb_pixel(0,0,0); - } - } - - long nr () const { return num_rows(img); } - long nc () const { return num_columns(img); } - }; - - template < - typename image_type - > - const matrix_op<op_randomly_color_image<image_type> > - randomly_color_image ( - const image_type& img - ) - { - typedef op_randomly_color_image<image_type> op; - return matrix_op<op>(op(img)); - } - -// ---------------------------------------------------------------------------------------- - - inline rgb_pixel colormap_heat ( - double value, - double min_val, - double max_val - ) - { - // scale the gray value into the range [0, 1] - const double gray = put_in_range(0, 1, (value - min_val)/(max_val-min_val)); - rgb_pixel pix(0,0,0); - - pix.red = static_cast<unsigned char>(std::min(gray/0.4,1.0)*255 + 0.5); - - if (gray > 0.4) - { - pix.green = static_cast<unsigned char>(std::min((gray-0.4)/0.4,1.0)*255 + 0.5); - } - if (gray > 0.8) - { - pix.blue = static_cast<unsigned char>(std::min((gray-0.8)/0.2,1.0)*255 + 0.5); - } - - return pix; - } - -// ---------------------------------------------------------------------------------------- - - template <typename T> - struct op_heatmap : does_not_alias - { - op_heatmap( - const T& img_, - const double max_val_, - const double min_val_ - ) : img(img_), max_val(max_val_), min_val(min_val_){} - - const T& img; - - const double max_val; - const double min_val; - - const static long cost = 7; - const static long NR = 0; - const static long NC = 0; - typedef rgb_pixel type; - typedef const rgb_pixel const_ret_type; - typedef default_memory_manager mem_manager_type; - typedef row_major_layout layout_type; - - const_ret_type apply (long r, long c ) const - { - return colormap_heat(get_pixel_intensity(mat(img)(r,c)), min_val, max_val); - } - - long nr () const { return num_rows(img); } - long nc () const { return num_columns(img); } - }; - - template < - typename image_type - > - const matrix_op<op_heatmap<image_type> > - heatmap ( - const image_type& img, - double max_val, - double min_val = 0 - ) - { - typedef op_heatmap<image_type> op; - return matrix_op<op>(op(img,max_val,min_val)); - } - - template < - typename image_type - > - const matrix_op<op_heatmap<image_type> > - heatmap ( - const image_type& img - ) - { - typedef op_heatmap<image_type> op; - if (num_columns(img) * num_rows(img) != 0) - return matrix_op<op>(op(img,max(mat(img)),min(mat(img)))); - else - return matrix_op<op>(op(img,0,0)); - } - -// ---------------------------------------------------------------------------------------- - - inline rgb_pixel colormap_jet ( - double value, - double min_val, - double max_val - ) - { - // scale the gray value into the range [0, 8] - const double gray = 8*put_in_range(0, 1, (value - min_val)/(max_val-min_val)); - rgb_pixel pix; - // s is the slope of color change - const double s = 1.0/2.0; - - if (gray <= 1) - { - pix.red = 0; - pix.green = 0; - pix.blue = static_cast<unsigned char>((gray+1)*s*255 + 0.5); - } - else if (gray <= 3) - { - pix.red = 0; - pix.green = static_cast<unsigned char>((gray-1)*s*255 + 0.5); - pix.blue = 255; - } - else if (gray <= 5) - { - pix.red = static_cast<unsigned char>((gray-3)*s*255 + 0.5); - pix.green = 255; - pix.blue = static_cast<unsigned char>((5-gray)*s*255 + 0.5); - } - else if (gray <= 7) - { - pix.red = 255; - pix.green = static_cast<unsigned char>((7-gray)*s*255 + 0.5); - pix.blue = 0; - } - else - { - pix.red = static_cast<unsigned char>((9-gray)*s*255 + 0.5); - pix.green = 0; - pix.blue = 0; - } - - return pix; - } - -// ---------------------------------------------------------------------------------------- - - template <typename T> - struct op_jet : does_not_alias - { - op_jet( - const T& img_, - const double max_val_, - const double min_val_ - ) : img(img_), max_val(max_val_), min_val(min_val_){} - - const T& img; - - const double max_val; - const double min_val; - - const static long cost = 7; - const static long NR = 0; - const static long NC = 0; - typedef rgb_pixel type; - typedef const rgb_pixel const_ret_type; - typedef default_memory_manager mem_manager_type; - typedef row_major_layout layout_type; - - const_ret_type apply (long r, long c ) const - { - return colormap_jet(get_pixel_intensity(mat(img)(r,c)), min_val, max_val); - } - - long nr () const { return num_rows(img); } - long nc () const { return num_columns(img); } - }; - - template < - typename image_type - > - const matrix_op<op_jet<image_type> > - jet ( - const image_type& img, - double max_val, - double min_val = 0 - ) - { - typedef op_jet<image_type> op; - return matrix_op<op>(op(img,max_val,min_val)); - } - - template < - typename image_type - > - const matrix_op<op_jet<image_type> > - jet ( - const image_type& img - ) - { - typedef op_jet<image_type> op; - if (num_columns(img) * num_rows(img) != 0) - return matrix_op<op>(op(img,max(mat(img)),min(mat(img)))); - else - return matrix_op<op>(op(img,0,0)); - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_RANDOMLY_COlOR_IMAGE_Hh_ - diff --git a/ml/dlib/dlib/image_transforms/colormaps_abstract.h b/ml/dlib/dlib/image_transforms/colormaps_abstract.h deleted file mode 100644 index 41a7784ba..000000000 --- a/ml/dlib/dlib/image_transforms/colormaps_abstract.h +++ /dev/null @@ -1,152 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_RANDOMLY_COlOR_IMAGE_ABSTRACT_Hh_ -#ifdef DLIB_RANDOMLY_COlOR_IMAGE_ABSTRACT_Hh_ - -#include "../hash.h" -#include "../pixel.h" -#include "../matrix.h" -#include "../image_processing/generic_image.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename image_type - > - const matrix_exp randomly_color_image ( - const image_type& img - ); - /*! - requires - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h, or something convertible to a matrix - via mat(). - ensures - - randomly generates a mapping from gray level pixel values - to the RGB pixel space and then uses this mapping to create - a colored version of img. Returns a matrix which represents - this colored version of img. - - black pixels in img will remain black in the output image. - - The returned matrix will have the same dimensions as img. - !*/ - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - rgb_pixel colormap_heat ( - double value, - double min_val, - double max_val - ); - /*! - requires - - min_val <= max_val - ensures - - Maps value to a color. In particular, we use a heatmap color scheme where - values <= min_val are black and larger values become more red, then yellow, - and then white as they approach max_val. - !*/ - - template < - typename image_type - > - const matrix_exp heatmap ( - const image_type& img, - double max_val, - double min_val = 0 - ); - /*! - requires - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h, or something convertible to a matrix - via mat(). - ensures - - Interprets img as a grayscale image and returns a new matrix which represents - a colored version of img. In particular, the colormap is defined by - out_color = colormap_heat(grayscale_pixel_value, min_val, max_val). - - The returned matrix will have the same dimensions as img. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type - > - const matrix_exp heatmap ( - const image_type& img - ); - /*! - requires - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h, or something convertible to a matrix - via mat(). - ensures - - returns heatmap(img, max(mat(img)), min(mat(img))) - !*/ - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - rgb_pixel colormap_jet ( - double value, - double min_val, - double max_val - ); - /*! - requires - - min_val <= max_val - ensures - - Maps value to a color. In particular, we use a jet color scheme where - values <= min_val are dark blue and larger values become light blue, then - yellow, and then finally red as they approach max_val. - !*/ - - template < - typename image_type - > - const matrix_exp jet ( - const image_type& img, - double max_val, - double min_val = 0 - ); - /*! - requires - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h, or something convertible to a matrix - via mat(). - ensures - - Interprets img as a grayscale image and returns a new matrix which represents - a colored version of img. In particular, the colormap is defined by - out_color = colormap_jet(grayscale_pixel_value, min_val, max_val). - - The returned matrix will have the same dimensions as img. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type - > - const matrix_exp jet ( - const image_type& img - ); - /*! - requires - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h, or something convertible to a matrix - via mat(). - ensures - - returns jet(img, max(mat(img)), min(mat(img))) - !*/ - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_RANDOMLY_COlOR_IMAGE_ABSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/image_transforms/draw.h b/ml/dlib/dlib/image_transforms/draw.h deleted file mode 100644 index 66737b215..000000000 --- a/ml/dlib/dlib/image_transforms/draw.h +++ /dev/null @@ -1,396 +0,0 @@ -// Copyright (C) 2008 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_DRAW_IMAGe_ -#define DLIB_DRAW_IMAGe_ - -#include "draw_abstract.h" -#include "../algs.h" -#include "../pixel.h" -#include "../matrix.h" -#include <cmath> - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type, - typename pixel_type - > - void draw_line ( - long x1, - long y1, - long x2, - long y2, - image_type& c_, - const pixel_type& val - ) - { - image_view<image_type> c(c_); - if (x1 == x2) - { - // make sure y1 comes before y2 - if (y1 > y2) - swap(y1,y2); - - if (x1 < 0 || x1 >= c.nc()) - return; - - - // this is a vertical line - for (long y = y1; y <= y2; ++y) - { - if (y < 0 || y >= c.nr()) - continue; - - assign_pixel(c[y][x1], val); - } - } - else if (y1 == y2) - { - - // make sure x1 comes before x2 - if (x1 > x2) - swap(x1,x2); - - if (y1 < 0 || y1 >= c.nr()) - return; - - // this is a horizontal line - for (long x = x1; x <= x2; ++x) - { - if (x < 0 || x >= c.nc()) - continue; - - assign_pixel(c[y1][x] , val); - } - } - else - { - // This part is a little more complicated because we are going to perform alpha - // blending so the diagonal lines look nice. - const rectangle valid_area = get_rect(c); - rgb_alpha_pixel alpha_pixel; - assign_pixel(alpha_pixel, val); - const unsigned char max_alpha = alpha_pixel.alpha; - - const long rise = (((long)y2) - ((long)y1)); - const long run = (((long)x2) - ((long)x1)); - if (std::abs(rise) < std::abs(run)) - { - const double slope = ((double)rise)/run; - - - double first, last; - - - if (x1 > x2) - { - first = std::max(x2,valid_area.left()); - last = std::min(x1,valid_area.right()); - } - else - { - first = std::max(x1,valid_area.left()); - last = std::min(x2,valid_area.right()); - } - - long y; - long x; - const double x1f = x1; - const double y1f = y1; - for (double i = first; i <= last; ++i) - { - const double dy = slope*(i-x1f) + y1f; - const double dx = i; - - y = static_cast<long>(dy); - x = static_cast<long>(dx); - - - if (y >= valid_area.top() && y <= valid_area.bottom()) - { - alpha_pixel.alpha = static_cast<unsigned char>((1.0-(dy-y))*max_alpha); - assign_pixel(c[y][x], alpha_pixel); - } - if (y+1 >= valid_area.top() && y+1 <= valid_area.bottom()) - { - alpha_pixel.alpha = static_cast<unsigned char>((dy-y)*max_alpha); - assign_pixel(c[y+1][x], alpha_pixel); - } - } - } - else - { - const double slope = ((double)run)/rise; - - - double first, last; - - - if (y1 > y2) - { - first = std::max(y2,valid_area.top()); - last = std::min(y1,valid_area.bottom()); - } - else - { - first = std::max(y1,valid_area.top()); - last = std::min(y2,valid_area.bottom()); - } - - long x; - long y; - const double x1f = x1; - const double y1f = y1; - for (double i = first; i <= last; ++i) - { - const double dx = slope*(i-y1f) + x1f; - const double dy = i; - - y = static_cast<long>(dy); - x = static_cast<long>(dx); - - if (x >= valid_area.left() && x <= valid_area.right()) - { - alpha_pixel.alpha = static_cast<unsigned char>((1.0-(dx-x))*max_alpha); - assign_pixel(c[y][x], alpha_pixel); - } - if (x+1 >= valid_area.left() && x+1 <= valid_area.right()) - { - alpha_pixel.alpha = static_cast<unsigned char>((dx-x)*max_alpha); - assign_pixel(c[y][x+1], alpha_pixel); - } - } - } - } - - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type, - typename pixel_type - > - void draw_line ( - image_type& c, - const point& p1, - const point& p2, - const pixel_type& val - ) - { - draw_line(p1.x(),p1.y(),p2.x(),p2.y(),c,val); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type, - typename pixel_type - > - void draw_rectangle ( - image_type& c, - const rectangle& rect, - const pixel_type& val - ) - { - draw_line(c, rect.tl_corner(), rect.tr_corner(), val); - draw_line(c, rect.bl_corner(), rect.br_corner(), val); - draw_line(c, rect.tl_corner(), rect.bl_corner(), val); - draw_line(c, rect.tr_corner(), rect.br_corner(), val); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type, - typename pixel_type - > - void draw_rectangle ( - image_type& c, - const rectangle& rect, - const pixel_type& val, - unsigned int thickness - ) - { - for (unsigned int i = 0; i < thickness; ++i) - { - if ((i%2)==0) - draw_rectangle(c,shrink_rect(rect,(i+1)/2),val); - else - draw_rectangle(c,grow_rect(rect,(i+1)/2),val); - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type, - typename pixel_type - > - void fill_rect ( - image_type& img_, - const rectangle& rect, - const pixel_type& pixel - ) - { - image_view<image_type> img(img_); - rectangle area = rect.intersect(get_rect(img)); - - for (long r = area.top(); r <= area.bottom(); ++r) - { - for (long c = area.left(); c <= area.right(); ++c) - { - assign_pixel(img[r][c], pixel); - } - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_array_type - > - matrix<typename image_traits<typename image_array_type::value_type>::pixel_type> tile_images ( - const image_array_type& images - ) - { - typedef typename image_traits<typename image_array_type::value_type>::pixel_type T; - - if (images.size() == 0) - return matrix<T>(); - - const unsigned long size_nc = square_root(images.size()); - const unsigned long size_nr = (size_nc*(size_nc-1)>=images.size())? size_nc-1 : size_nc; - // Figure out the size we have to use for each chip in the big main image. We will - // use the largest dimensions seen across all the chips. - long nr = 0; - long nc = 0; - for (unsigned long i = 0; i < images.size(); ++i) - { - nr = std::max(num_rows(images[i]), nr); - nc = std::max(num_columns(images[i]), nc); - } - - matrix<T> temp(size_nr*nr, size_nc*nc); - T background_color; - assign_pixel(background_color, 0); - temp = background_color; - unsigned long idx = 0; - for (unsigned long r = 0; r < size_nr; ++r) - { - for (unsigned long c = 0; c < size_nc; ++c) - { - if (idx < images.size()) - { - set_subm(temp, r*nr, c*nc, nr, nc) = mat(images[idx]); - } - ++idx; - } - } - return temp; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type, - typename pixel_type - > - void draw_solid_circle ( - image_type& img_, - const dpoint& center_point, - double radius, - const pixel_type& pixel - ) - { - image_view<image_type> img(img_); - using std::sqrt; - const rectangle valid_area(get_rect(img)); - const double x = center_point.x(); - const double y = center_point.y(); - const point cp(center_point); - if (radius > 1) - { - long first_x = static_cast<long>(x - radius + 0.5); - long last_x = static_cast<long>(x + radius + 0.5); - const double rs = radius*radius; - - // ensure that we only loop over the part of the x dimension that this - // image contains. - if (first_x < valid_area.left()) - first_x = valid_area.left(); - if (last_x > valid_area.right()) - last_x = valid_area.right(); - - long top, bottom; - - top = static_cast<long>(sqrt(std::max(rs - (first_x-x-0.5)*(first_x-x-0.5),0.0))+0.5); - top += y; - long last = top; - - // draw the left half of the circle - long middle = std::min(cp.x()-1,last_x); - for (long i = first_x; i <= middle; ++i) - { - double a = i - x + 0.5; - // find the top of the arc - top = static_cast<long>(sqrt(std::max(rs - a*a,0.0))+0.5); - top += y; - long temp = top; - - while(top >= last) - { - bottom = y - top + y; - draw_line(img_, point(i,top),point(i,bottom),pixel); - --top; - } - - last = temp; - } - - middle = std::max(cp.x(),first_x); - top = static_cast<long>(sqrt(std::max(rs - (last_x-x+0.5)*(last_x-x+0.5),0.0))+0.5); - top += y; - last = top; - // draw the right half of the circle - for (long i = last_x; i >= middle; --i) - { - double a = i - x - 0.5; - // find the top of the arc - top = static_cast<long>(sqrt(std::max(rs - a*a,0.0))+0.5); - top += y; - long temp = top; - - while(top >= last) - { - bottom = y - top + y; - draw_line(img_, point(i,top),point(i,bottom),pixel); - --top; - } - - last = temp; - } - } - else if (valid_area.contains(cp)) - { - // For circles smaller than a pixel we will just alpha blend them in proportion - // to how small they are. - rgb_alpha_pixel temp; - assign_pixel(temp, pixel); - temp.alpha = static_cast<unsigned char>(255*radius + 0.5); - assign_pixel(img[cp.y()][cp.x()], temp); - } - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_DRAW_IMAGe_ - - - - diff --git a/ml/dlib/dlib/image_transforms/draw_abstract.h b/ml/dlib/dlib/image_transforms/draw_abstract.h deleted file mode 100644 index 6631f8d8f..000000000 --- a/ml/dlib/dlib/image_transforms/draw_abstract.h +++ /dev/null @@ -1,150 +0,0 @@ -// Copyright (C) 2008 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_DRAW_IMAGe_ABSTRACT -#ifdef DLIB_DRAW_IMAGe_ABSTRACT - -#include "../matrix.h" -#include "../image_processing/generic_image.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type, - typename pixel_type - > - void draw_line ( - image_type& img, - const point& p1, - const point& p2, - const pixel_type& val - ); - /*! - requires - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - ensures - - #img.nr() == img.nr() && #img.nc() == img.nc() - (i.e. the dimensions of the input image are not changed) - - for all valid r and c that are on the line between point p1 and p2: - - performs assign_pixel(img[r][c], val) - (i.e. it draws the line from p1 to p2 onto the image) - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type, - typename pixel_type - > - void draw_line ( - long x1, - long y1, - long x2, - long y2, - image_type& img, - const pixel_type& val - ); - /*! - requires - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - ensures - - performs draw_line(img, point(x1,y1), point(x2,y2), val) - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type, - typename pixel_type - > - void draw_rectangle ( - image_type& img, - const rectangle& rect, - const pixel_type& val, - unsigned int thickness = 1 - ); - /*! - requires - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - pixel_traits<pixel_type> is defined - ensures - - Draws the given rectangle onto the image img. It does this by calling - draw_line() four times to draw the four sides of the rectangle. - - The rectangle is drawn with the color given by val. - - The drawn rectangle will have edges that are thickness pixels wide. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type, - typename pixel_type - > - void draw_solid_circle ( - image_type& img, - const dpoint& center_point, - double radius, - const pixel_type& pixel - ); - /*! - requires - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - pixel_traits<pixel_type> is defined - ensures - - Draws a fully filled in circle onto image that is centered at center_point - and has the given radius. The circle will be filled by assigning the given - pixel value to each element of the circle. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type, - typename pixel_type - > - void fill_rect ( - image_type& img, - const rectangle& rect, - const pixel_type& pixel - ); - /*! - requires - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - pixel_traits<pixel_type> is defined - ensures - - fills the area defined by rect in the given image with the given pixel value. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_array_type - > - matrix<typename image_traits<typename image_array_type::value_type>::pixel_type> tile_images ( - const image_array_type& images - ); - /*! - requires - - image_array_type is a dlib::array of image objects where each image object - implements the interface defined in dlib/image_processing/generic_image.h - ensures - - This function takes the given images and tiles them into a single large - square image and returns this new big tiled image. Therefore, it is a useful - method to visualize many small images at once. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_DRAW_IMAGe_ABSTRACT - - - diff --git a/ml/dlib/dlib/image_transforms/edge_detector.h b/ml/dlib/dlib/image_transforms/edge_detector.h deleted file mode 100644 index 2fa898fed..000000000 --- a/ml/dlib/dlib/image_transforms/edge_detector.h +++ /dev/null @@ -1,302 +0,0 @@ -// Copyright (C) 2008 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_EDGE_DETECTOr_ -#define DLIB_EDGE_DETECTOr_ - -#include "edge_detector_abstract.h" -#include "../pixel.h" -#include "../array2d.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - inline char edge_orientation ( - const T& x_, - const T& y_ - ) - { - - // if this is a perfectly horizontal gradient then return right away - if (x_ == 0) - { - return '|'; - } - else if (y_ == 0) // if this is a perfectly vertical gradient then return right away - { - return '-'; - } - - // Promote x so that when we multiply by 128 later we know overflow won't happen. - typedef typename promote<T>::type type; - type x = x_; - type y = y_; - - if (x < 0) - { - x = -x; - if (y < 0) - { - y = -y; - x *= 128; - const type temp = x/y; - if (temp > 309) - return '-'; - else if (temp > 53) - return '/'; - else - return '|'; - } - else - { - x *= 128; - const type temp = x/y; - if (temp > 309) - return '-'; - else if (temp > 53) - return '\\'; - else - return '|'; - } - } - else - { - if (y < 0) - { - y = -y; - x *= 128; - - const type temp = x/y; - if (temp > 309) - return '-'; - else if (temp > 53) - return '\\'; - else - return '|'; - } - else - { - x *= 128; - - const type temp = x/y; - if (temp > 309) - return '-'; - else if (temp > 53) - return '/'; - else - return '|'; - } - } - - } - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type - > - void sobel_edge_detector ( - const in_image_type& in_img_, - out_image_type& horz_, - out_image_type& vert_ - ) - { - typedef typename image_traits<out_image_type>::pixel_type pixel_type; - COMPILE_TIME_ASSERT(pixel_traits<pixel_type>::is_unsigned == false); - DLIB_ASSERT( !is_same_object(in_img_,horz_) && !is_same_object(in_img_,vert_) && - !is_same_object(horz_,vert_), - "\tvoid sobel_edge_detector(in_img_, horz_, vert_)" - << "\n\t You can't give the same image as more than one argument" - << "\n\t is_same_object(in_img_,horz_): " << is_same_object(in_img_,horz_) - << "\n\t is_same_object(in_img_,vert_): " << is_same_object(in_img_,vert_) - << "\n\t is_same_object(horz_,vert_): " << is_same_object(horz_,vert_) - ); - - - const int vert_filter[3][3] = {{-1,-2,-1}, - {0,0,0}, - {1,2,1}}; - const int horz_filter[3][3] = { {-1,0,1}, - {-2,0,2}, - {-1,0,1}}; - - const long M = 3; - const long N = 3; - - - const_image_view<in_image_type> in_img(in_img_); - image_view<out_image_type> horz(horz_); - image_view<out_image_type> vert(vert_); - - horz.set_size(in_img.nr(),in_img.nc()); - vert.set_size(in_img.nr(),in_img.nc()); - - assign_border_pixels(horz,1,1,0); - assign_border_pixels(vert,1,1,0); - - // figure out the range that we should apply the filter to - const long first_row = M/2; - const long first_col = N/2; - const long last_row = in_img.nr() - M/2; - const long last_col = in_img.nc() - N/2; - - - // apply the filter to the image - for (long r = first_row; r < last_row; ++r) - { - for (long c = first_col; c < last_col; ++c) - { - typedef typename pixel_traits<typename image_traits<in_image_type>::pixel_type>::basic_pixel_type bp_type; - - typename promote<bp_type>::type p, horz_temp, vert_temp; - horz_temp = 0; - vert_temp = 0; - for (long m = 0; m < M; ++m) - { - for (long n = 0; n < N; ++n) - { - // pull out the current pixel and put it into p - p = get_pixel_intensity(in_img[r-M/2+m][c-N/2+n]); - - horz_temp += p*horz_filter[m][n]; - vert_temp += p*vert_filter[m][n]; - } - } - - assign_pixel(horz[r][c] , horz_temp); - assign_pixel(vert[r][c] , vert_temp); - - } - } - } - -// ---------------------------------------------------------------------------------------- - - namespace impl - { - template <typename T> - typename promote<T>::type square (const T& a) - { - return static_cast<T>(a)*static_cast<T>(a); - } - } - - template < - typename in_image_type, - typename out_image_type - > - void suppress_non_maximum_edges ( - const in_image_type& horz_, - const in_image_type& vert_, - out_image_type& out_img_ - ) - { - const_image_view<in_image_type> horz(horz_); - const_image_view<in_image_type> vert(vert_); - image_view<out_image_type> out_img(out_img_); - - COMPILE_TIME_ASSERT(is_signed_type<typename image_traits<in_image_type>::pixel_type>::value); - DLIB_ASSERT( horz.nr() == vert.nr() && horz.nc() == vert.nc(), - "\tvoid suppress_non_maximum_edges(horz, vert, out_img)" - << "\n\tYou have to give horz and vert gradient images that are the same size" - << "\n\thorz.nr(): " << horz.nr() - << "\n\thorz.nc(): " << horz.nc() - << "\n\tvert.nr(): " << vert.nr() - << "\n\tvert.nc(): " << vert.nc() - ); - DLIB_ASSERT( !is_same_object(out_img_,horz_) && !is_same_object(out_img_,vert_), - "\tvoid suppress_non_maximum_edges(horz_, vert_, out_img_)" - << "\n\t out_img can't be the same as one of the input images." - << "\n\t is_same_object(out_img_,horz_): " << is_same_object(out_img_,horz_) - << "\n\t is_same_object(out_img_,vert_): " << is_same_object(out_img_,vert_) - ); - - using std::min; - using std::abs; - - - // if there isn't any input image then don't do anything - if (horz.size() == 0) - { - out_img.clear(); - return; - } - - out_img.set_size(horz.nr(),horz.nc()); - - zero_border_pixels(out_img,1,1); - - // now do non maximum suppression while we copy the - const long M = 3; - const long N = 3; - - // figure out the range that we should apply the filter to - const long first_row = M/2; - const long first_col = N/2; - const long last_row = horz.nr() - M/2; - const long last_col = horz.nc() - N/2; - - - // apply the filter to the image - for (long r = first_row; r < last_row; ++r) - { - for (long c = first_col; c < last_col; ++c) - { - typedef typename promote<typename image_traits<in_image_type>::pixel_type>::type T; - const T y = horz[r][c]; - const T x = vert[r][c]; - - using impl::square; - - const T val = square(horz[r][c]) + square(vert[r][c]); - - const char ori = edge_orientation(x,y); - const unsigned char zero = 0; - switch (ori) - { - case '-': - if (square(horz[r-1][c])+square(vert[r-1][c]) > val || square(horz[r+1][c]) + square(vert[r+1][c]) > val) - assign_pixel(out_img[r][c] , zero); - else - assign_pixel(out_img[r][c] , std::sqrt((double)val)); - break; - - case '|': - if (square(horz[r][c-1]) + square(vert[r][c-1]) > val || square(horz[r][c+1]) + square(vert[r][c+1]) > val) - assign_pixel(out_img[r][c] , zero); - else - assign_pixel(out_img[r][c] , std::sqrt((double)val)); - break; - - case '/': - if (square(horz[r-1][c-1]) + square(vert[r-1][c-1]) > val || square(horz[r+1][c+1]) + square(vert[r+1][c+1]) > val) - assign_pixel(out_img[r][c] , zero); - else - assign_pixel(out_img[r][c] , std::sqrt((double)val)); - break; - - case '\\': - if (square(horz[r+1][c-1]) + square(vert[r+1][c-1]) > val || square(horz[r-1][c+1]) + square(vert[r-1][c+1]) > val) - assign_pixel(out_img[r][c] , zero); - else - assign_pixel(out_img[r][c] , std::sqrt((double)val)); - break; - - } - } - } - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_EDGE_DETECTOr_ - - - diff --git a/ml/dlib/dlib/image_transforms/edge_detector_abstract.h b/ml/dlib/dlib/image_transforms/edge_detector_abstract.h deleted file mode 100644 index 42c991665..000000000 --- a/ml/dlib/dlib/image_transforms/edge_detector_abstract.h +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright (C) 2008 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_EDGE_DETECTOr_ABSTRACT_ -#ifdef DLIB_EDGE_DETECTOr_ABSTRACT_ - -#include "../pixel.h" -#include "../image_processing/generic_image.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - inline char edge_orientation ( - const T& x, - const T& y - ); - /*! - ensures - - returns the orientation of the line drawn from the origin to the point (x,y). - The orientation is represented pictorially using the four ascii - characters /,|,\, and -. - - if (the line is horizontal) then - returns '-' - - if (the line is vertical) then - returns '|' - - if (the line is diagonal with a positive slope) then - returns '/' - - if (the line is diagonal with a negative slope) then - returns '\\' - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type - > - void sobel_edge_detector ( - const in_image_type& in_img, - out_image_type& horz, - out_image_type& vert - ); - /*! - requires - - in_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - out_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - out_image_type must use signed grayscale pixels - - is_same_object(in_img,horz) == false - - is_same_object(in_img,vert) == false - - is_same_object(horz,vert) == false - ensures - - Applies the sobel edge detector to the given input image and stores the resulting - edge detections in the horz and vert images - - #horz.nr() == in_img.nr() - - #horz.nc() == in_img.nc() - - #vert.nr() == in_img.nr() - - #vert.nc() == in_img.nc() - - for all valid r and c: - - #horz[r][c] == the magnitude of the horizontal gradient at the point in_img[r][c] - - #vert[r][c] == the magnitude of the vertical gradient at the point in_img[r][c] - - edge_orientation(#vert[r][c], #horz[r][c]) == the edge direction at this point in - the image - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type - > - void suppress_non_maximum_edges ( - const in_image_type& horz, - const in_image_type& vert, - out_image_type& out_img - ); - /*! - requires - - in_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - out_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - horz.nr() == vert.nr() - - horz.nc() == vert.nc() - - is_same_object(out_img, horz) == false - - is_same_object(out_img, vert) == false - - image_traits<in_image_type>::pixel_type == A signed scalar type (e.g. int, double, etc.) - ensures - - #out_img.nr() = horz.nr() - - #out_img.nc() = horz.nc() - - let edge_strength(r,c) == sqrt(pow(horz[r][c],2) + pow(vert[r][c],2)) - (i.e. The Euclidean norm of the gradient) - - for all valid r and c: - - if (edge_strength(r,c) is at a maximum with respect to its 2 neighboring - pixels along the line given by edge_orientation(vert[r][c],horz[r][c])) then - - performs assign_pixel(#out_img[r][c], edge_strength(r,c)) - - else - - performs assign_pixel(#out_img[r][c], 0) - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_EDGE_DETECTOr_ABSTRACT_ - - diff --git a/ml/dlib/dlib/image_transforms/equalize_histogram.h b/ml/dlib/dlib/image_transforms/equalize_histogram.h deleted file mode 100644 index dd048759a..000000000 --- a/ml/dlib/dlib/image_transforms/equalize_histogram.h +++ /dev/null @@ -1,143 +0,0 @@ -// Copyright (C) 2006 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_EQUALIZE_HISTOGRAm_ -#define DLIB_EQUALIZE_HISTOGRAm_ - -#include "../pixel.h" -#include "equalize_histogram_abstract.h" -#include <vector> -#include "../enable_if.h" -#include "../matrix.h" - -namespace dlib -{ - -// --------------------------------------------------------------------------------------- - - template < - typename in_image_type, - long R, - long C, - typename MM - > - void get_histogram ( - const in_image_type& in_img_, - matrix<unsigned long,R,C,MM>& hist - ) - { - typedef typename image_traits<in_image_type>::pixel_type pixel_type; - COMPILE_TIME_ASSERT( pixel_traits<pixel_type>::is_unsigned == true ); - - typedef typename pixel_traits<pixel_type>::basic_pixel_type in_image_basic_pixel_type; - COMPILE_TIME_ASSERT( sizeof(in_image_basic_pixel_type) <= 2); - - // make sure hist is the right size - if (R == 1) - hist.set_size(1,pixel_traits<pixel_type>::max()+1); - else - hist.set_size(pixel_traits<pixel_type>::max()+1,1); - - - set_all_elements(hist,0); - - const_image_view<in_image_type> in_img(in_img_); - // compute the histogram - for (long r = 0; r < in_img.nr(); ++r) - { - for (long c = 0; c < in_img.nc(); ++c) - { - unsigned long p = get_pixel_intensity(in_img[r][c]); - ++hist(p); - } - } - } - -// --------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type - > - void equalize_histogram ( - const in_image_type& in_img_, - out_image_type& out_img_ - ) - { - const_image_view<in_image_type> in_img(in_img_); - image_view<out_image_type> out_img(out_img_); - - typedef typename image_traits<in_image_type>::pixel_type in_pixel_type; - typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; - - COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false ); - COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false ); - - COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::is_unsigned == true ); - COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::is_unsigned == true ); - - typedef typename pixel_traits<in_pixel_type>::basic_pixel_type in_image_basic_pixel_type; - COMPILE_TIME_ASSERT( sizeof(in_image_basic_pixel_type) <= 2); - - - // if there isn't any input image then don't do anything - if (in_img.size() == 0) - { - out_img.clear(); - return; - } - - out_img.set_size(in_img.nr(),in_img.nc()); - - unsigned long p; - - matrix<unsigned long,1,0> histogram; - get_histogram(in_img_, histogram); - in_img = in_img_; - - double scale = pixel_traits<out_pixel_type>::max(); - if (in_img.size() > histogram(0)) - scale /= in_img.size()-histogram(0); - else - scale = 0; - - // make the black pixels remain black in the output image - histogram(0) = 0; - - // compute the transform function - for (long i = 1; i < histogram.size(); ++i) - histogram(i) += histogram(i-1); - // scale so that it is in the range [0,pixel_traits<out_pixel_type>::max()] - for (long i = 0; i < histogram.size(); ++i) - histogram(i) = static_cast<unsigned long>(histogram(i)*scale); - - // now do the transform - for (long row = 0; row < in_img.nr(); ++row) - { - for (long col = 0; col < in_img.nc(); ++col) - { - p = histogram(get_pixel_intensity(in_img[row][col])); - assign_pixel(out_img[row][col], in_img[row][col]); - assign_pixel_intensity(out_img[row][col],p); - } - } - - } - - template < - typename image_type - > - void equalize_histogram ( - image_type& img - ) - { - equalize_histogram(img,img); - } - -// --------------------------------------------------------------------------------------- - -} - -#endif // DLIB_EQUALIZE_HISTOGRAm_ - - - diff --git a/ml/dlib/dlib/image_transforms/equalize_histogram_abstract.h b/ml/dlib/dlib/image_transforms/equalize_histogram_abstract.h deleted file mode 100644 index 2592aef1a..000000000 --- a/ml/dlib/dlib/image_transforms/equalize_histogram_abstract.h +++ /dev/null @@ -1,91 +0,0 @@ -// Copyright (C) 2006 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_EQUALIZE_HISTOGRAm_ABSTRACT_ -#ifdef DLIB_EQUALIZE_HISTOGRAm_ABSTRACT_ - -#include "../pixel.h" -#include "../matrix.h" -#include "../image_processing/generic_image.h" - -namespace dlib -{ - -// --------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type - > - void equalize_histogram ( - const in_image_type& in_img, - out_image_type& out_img - ); - /*! - requires - - in_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - out_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - Let pixel_type be the type of pixel in either input or output images, then we - must have: - - pixel_traits<pixel_type>::has_alpha == false - - pixel_traits<pixel_type>::is_unsigned == true - - For the input image pixel type, we have the additional requirement that: - - pixel_traits<pixel_type>::max() <= 65535 - ensures - - #out_img == the histogram equalized version of in_img - - #out_img.nc() == in_img.nc() - - #out_img.nr() == in_img.nr() - !*/ - - template < - typename image_type - > - void equalize_histogram ( - image_type& img - ); - /*! - requires - - it is valid to call equalize_histogram(img,img) - ensures - - calls equalize_histogram(img,img); - !*/ - -// --------------------------------------------------------------------------------------- - - template < - typename in_image_type, - long R, - long C, - typename MM - > - void get_histogram ( - const in_image_type& in_img, - matrix<unsigned long,R,C,MM>& hist - ); - /*! - requires - - in_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - Let pixel_type denote the type of pixel in in_img, then we must have: - - pixel_traits<pixel_type>::is_unsigned == true - - pixel_traits<pixel_type>::max() <= 65535 - - hist must be capable of representing a column vector of length - pixel_traits<typename in_image_type>::max(). I.e. if R and C are nonzero - then they must be values that don't conflict with the previous sentence. - ensures - - #hist.size() == pixel_traits<typename in_image_type>::max() - - #hist.nc() == 1 || #hist.nr() == 1 (i.e. hist is either a row or column vector) - - #hist == the histogram for in_img. I.e. it is the case that for all - valid i: - - hist(i) == the number of times a pixel with intensity i appears - in in_img - !*/ - -// --------------------------------------------------------------------------------------- - -} - -#endif // DLIB_EQUALIZE_HISTOGRAm_ABSTRACT_ - - diff --git a/ml/dlib/dlib/image_transforms/fhog.h b/ml/dlib/dlib/image_transforms/fhog.h deleted file mode 100644 index d99973adf..000000000 --- a/ml/dlib/dlib/image_transforms/fhog.h +++ /dev/null @@ -1,1404 +0,0 @@ -// Copyright (C) 2013 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_fHOG_Hh_ -#define DLIB_fHOG_Hh_ - -#include "fhog_abstract.h" -#include "../matrix.h" -#include "../array2d.h" -#include "../array.h" -#include "../geometry.h" -#include "assign_image.h" -#include "draw.h" -#include "interpolation.h" -#include "../simd.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - namespace impl_fhog - { - template <typename image_type, typename T> - inline typename dlib::enable_if_c<pixel_traits<typename image_type::pixel_type>::rgb>::type get_gradient ( - const int r, - const int c, - const image_type& img, - matrix<T,2,1>& grad, - T& len - ) - { - matrix<T, 2, 1> grad2, grad3; - // get the red gradient - grad(0) = (int)img[r][c+1].red-(int)img[r][c-1].red; - grad(1) = (int)img[r+1][c].red-(int)img[r-1][c].red; - len = length_squared(grad); - - // get the green gradient - grad2(0) = (int)img[r][c+1].green-(int)img[r][c-1].green; - grad2(1) = (int)img[r+1][c].green-(int)img[r-1][c].green; - T v2 = length_squared(grad2); - - // get the blue gradient - grad3(0) = (int)img[r][c+1].blue-(int)img[r][c-1].blue; - grad3(1) = (int)img[r+1][c].blue-(int)img[r-1][c].blue; - T v3 = length_squared(grad3); - - // pick color with strongest gradient - if (v2 > len) - { - len = v2; - grad = grad2; - } - if (v3 > len) - { - len = v3; - grad = grad3; - } - } - - template <typename image_type> - inline typename dlib::enable_if_c<pixel_traits<typename image_type::pixel_type>::rgb>::type get_gradient ( - const int r, - const int c, - const image_type& img, - simd4f& grad_x, - simd4f& grad_y, - simd4f& len - ) - { - simd4i rleft((int)img[r][c-1].red, - (int)img[r][c].red, - (int)img[r][c+1].red, - (int)img[r][c+2].red); - simd4i rright((int)img[r][c+1].red, - (int)img[r][c+2].red, - (int)img[r][c+3].red, - (int)img[r][c+4].red); - simd4i rtop((int)img[r-1][c].red, - (int)img[r-1][c+1].red, - (int)img[r-1][c+2].red, - (int)img[r-1][c+3].red); - simd4i rbottom((int)img[r+1][c].red, - (int)img[r+1][c+1].red, - (int)img[r+1][c+2].red, - (int)img[r+1][c+3].red); - - simd4i gleft((int)img[r][c-1].green, - (int)img[r][c].green, - (int)img[r][c+1].green, - (int)img[r][c+2].green); - simd4i gright((int)img[r][c+1].green, - (int)img[r][c+2].green, - (int)img[r][c+3].green, - (int)img[r][c+4].green); - simd4i gtop((int)img[r-1][c].green, - (int)img[r-1][c+1].green, - (int)img[r-1][c+2].green, - (int)img[r-1][c+3].green); - simd4i gbottom((int)img[r+1][c].green, - (int)img[r+1][c+1].green, - (int)img[r+1][c+2].green, - (int)img[r+1][c+3].green); - - simd4i bleft((int)img[r][c-1].blue, - (int)img[r][c].blue, - (int)img[r][c+1].blue, - (int)img[r][c+2].blue); - simd4i bright((int)img[r][c+1].blue, - (int)img[r][c+2].blue, - (int)img[r][c+3].blue, - (int)img[r][c+4].blue); - simd4i btop((int)img[r-1][c].blue, - (int)img[r-1][c+1].blue, - (int)img[r-1][c+2].blue, - (int)img[r-1][c+3].blue); - simd4i bbottom((int)img[r+1][c].blue, - (int)img[r+1][c+1].blue, - (int)img[r+1][c+2].blue, - (int)img[r+1][c+3].blue); - - simd4i grad_x_red = rright-rleft; - simd4i grad_y_red = rbottom-rtop; - simd4i grad_x_green = gright-gleft; - simd4i grad_y_green = gbottom-gtop; - simd4i grad_x_blue = bright-bleft; - simd4i grad_y_blue = bbottom-btop; - - simd4i rlen = grad_x_red*grad_x_red + grad_y_red*grad_y_red; - simd4i glen = grad_x_green*grad_x_green + grad_y_green*grad_y_green; - simd4i blen = grad_x_blue*grad_x_blue + grad_y_blue*grad_y_blue; - - simd4i cmp = rlen>glen; - simd4i tgrad_x = select(cmp,grad_x_red,grad_x_green); - simd4i tgrad_y = select(cmp,grad_y_red,grad_y_green); - simd4i tlen = select(cmp,rlen,glen); - - cmp = tlen>blen; - grad_x = select(cmp,tgrad_x,grad_x_blue); - grad_y = select(cmp,tgrad_y,grad_y_blue); - len = select(cmp,tlen,blen); - } - - // ------------------------------------------------------------------------------------ - - template <typename image_type> - inline typename dlib::enable_if_c<pixel_traits<typename image_type::pixel_type>::rgb>::type get_gradient( - const int r, - const int c, - const image_type& img, - simd8f& grad_x, - simd8f& grad_y, - simd8f& len - ) - { - simd8i rleft((int)img[r][c - 1].red, - (int)img[r][c].red, - (int)img[r][c + 1].red, - (int)img[r][c + 2].red, - (int)img[r][c + 3].red, - (int)img[r][c + 4].red, - (int)img[r][c + 5].red, - (int)img[r][c + 6].red); - simd8i rright((int)img[r][c + 1].red, - (int)img[r][c + 2].red, - (int)img[r][c + 3].red, - (int)img[r][c + 4].red, - (int)img[r][c + 5].red, - (int)img[r][c + 6].red, - (int)img[r][c + 7].red, - (int)img[r][c + 8].red); - simd8i rtop((int)img[r - 1][c].red, - (int)img[r - 1][c + 1].red, - (int)img[r - 1][c + 2].red, - (int)img[r - 1][c + 3].red, - (int)img[r - 1][c + 4].red, - (int)img[r - 1][c + 5].red, - (int)img[r - 1][c + 6].red, - (int)img[r - 1][c + 7].red); - simd8i rbottom((int)img[r + 1][c].red, - (int)img[r + 1][c + 1].red, - (int)img[r + 1][c + 2].red, - (int)img[r + 1][c + 3].red, - (int)img[r + 1][c + 4].red, - (int)img[r + 1][c + 5].red, - (int)img[r + 1][c + 6].red, - (int)img[r + 1][c + 7].red); - - simd8i gleft((int)img[r][c - 1].green, - (int)img[r][c].green, - (int)img[r][c + 1].green, - (int)img[r][c + 2].green, - (int)img[r][c + 3].green, - (int)img[r][c + 4].green, - (int)img[r][c + 5].green, - (int)img[r][c + 6].green); - simd8i gright((int)img[r][c + 1].green, - (int)img[r][c + 2].green, - (int)img[r][c + 3].green, - (int)img[r][c + 4].green, - (int)img[r][c + 5].green, - (int)img[r][c + 6].green, - (int)img[r][c + 7].green, - (int)img[r][c + 8].green); - simd8i gtop((int)img[r - 1][c].green, - (int)img[r - 1][c + 1].green, - (int)img[r - 1][c + 2].green, - (int)img[r - 1][c + 3].green, - (int)img[r - 1][c + 4].green, - (int)img[r - 1][c + 5].green, - (int)img[r - 1][c + 6].green, - (int)img[r - 1][c + 7].green); - simd8i gbottom((int)img[r + 1][c].green, - (int)img[r + 1][c + 1].green, - (int)img[r + 1][c + 2].green, - (int)img[r + 1][c + 3].green, - (int)img[r + 1][c + 4].green, - (int)img[r + 1][c + 5].green, - (int)img[r + 1][c + 6].green, - (int)img[r + 1][c + 7].green); - - simd8i bleft((int)img[r][c - 1].blue, - (int)img[r][c].blue, - (int)img[r][c + 1].blue, - (int)img[r][c + 2].blue, - (int)img[r][c + 3].blue, - (int)img[r][c + 4].blue, - (int)img[r][c + 5].blue, - (int)img[r][c + 6].blue); - simd8i bright((int)img[r][c + 1].blue, - (int)img[r][c + 2].blue, - (int)img[r][c + 3].blue, - (int)img[r][c + 4].blue, - (int)img[r][c + 5].blue, - (int)img[r][c + 6].blue, - (int)img[r][c + 7].blue, - (int)img[r][c + 8].blue); - simd8i btop((int)img[r - 1][c].blue, - (int)img[r - 1][c + 1].blue, - (int)img[r - 1][c + 2].blue, - (int)img[r - 1][c + 3].blue, - (int)img[r - 1][c + 4].blue, - (int)img[r - 1][c + 5].blue, - (int)img[r - 1][c + 6].blue, - (int)img[r - 1][c + 7].blue); - simd8i bbottom((int)img[r + 1][c].blue, - (int)img[r + 1][c + 1].blue, - (int)img[r + 1][c + 2].blue, - (int)img[r + 1][c + 3].blue, - (int)img[r + 1][c + 4].blue, - (int)img[r + 1][c + 5].blue, - (int)img[r + 1][c + 6].blue, - (int)img[r + 1][c + 7].blue); - - simd8i grad_x_red = rright - rleft; - simd8i grad_y_red = rbottom - rtop; - simd8i grad_x_green = gright - gleft; - simd8i grad_y_green = gbottom - gtop; - simd8i grad_x_blue = bright - bleft; - simd8i grad_y_blue = bbottom - btop; - - simd8i rlen = grad_x_red*grad_x_red + grad_y_red*grad_y_red; - simd8i glen = grad_x_green*grad_x_green + grad_y_green*grad_y_green; - simd8i blen = grad_x_blue*grad_x_blue + grad_y_blue*grad_y_blue; - - simd8i cmp = rlen > glen; - simd8i tgrad_x = select(cmp, grad_x_red, grad_x_green); - simd8i tgrad_y = select(cmp, grad_y_red, grad_y_green); - simd8i tlen = select(cmp, rlen, glen); - - cmp = tlen > blen; - grad_x = select(cmp, tgrad_x, grad_x_blue); - grad_y = select(cmp, tgrad_y, grad_y_blue); - len = select(cmp, tlen, blen); - } - - // ------------------------------------------------------------------------------------ - - template <typename image_type, typename T> - inline typename dlib::disable_if_c<pixel_traits<typename image_type::pixel_type>::rgb>::type get_gradient ( - const int r, - const int c, - const image_type& img, - matrix<T, 2, 1>& grad, - T& len - ) - { - grad(0) = (int)get_pixel_intensity(img[r][c+1])-(int)get_pixel_intensity(img[r][c-1]); - grad(1) = (int)get_pixel_intensity(img[r+1][c])-(int)get_pixel_intensity(img[r-1][c]); - len = length_squared(grad); - } - - template <typename image_type> - inline typename dlib::disable_if_c<pixel_traits<typename image_type::pixel_type>::rgb>::type get_gradient ( - int r, - int c, - const image_type& img, - simd4f& grad_x, - simd4f& grad_y, - simd4f& len - ) - { - simd4i left((int)get_pixel_intensity(img[r][c-1]), - (int)get_pixel_intensity(img[r][c]), - (int)get_pixel_intensity(img[r][c+1]), - (int)get_pixel_intensity(img[r][c+2])); - simd4i right((int)get_pixel_intensity(img[r][c+1]), - (int)get_pixel_intensity(img[r][c+2]), - (int)get_pixel_intensity(img[r][c+3]), - (int)get_pixel_intensity(img[r][c+4])); - - simd4i top((int)get_pixel_intensity(img[r-1][c]), - (int)get_pixel_intensity(img[r-1][c+1]), - (int)get_pixel_intensity(img[r-1][c+2]), - (int)get_pixel_intensity(img[r-1][c+3])); - simd4i bottom((int)get_pixel_intensity(img[r+1][c]), - (int)get_pixel_intensity(img[r+1][c+1]), - (int)get_pixel_intensity(img[r+1][c+2]), - (int)get_pixel_intensity(img[r+1][c+3])); - - grad_x = right-left; - grad_y = bottom-top; - - len = (grad_x*grad_x + grad_y*grad_y); - } - - // ------------------------------------------------------------------------------------ - - template <typename image_type> - inline typename dlib::disable_if_c<pixel_traits<typename image_type::pixel_type>::rgb>::type get_gradient( - int r, - int c, - const image_type& img, - simd8f& grad_x, - simd8f& grad_y, - simd8f& len - ) - { - simd8i left((int)get_pixel_intensity(img[r][c - 1]), - (int)get_pixel_intensity(img[r][c]), - (int)get_pixel_intensity(img[r][c + 1]), - (int)get_pixel_intensity(img[r][c + 2]), - (int)get_pixel_intensity(img[r][c + 3]), - (int)get_pixel_intensity(img[r][c + 4]), - (int)get_pixel_intensity(img[r][c + 5]), - (int)get_pixel_intensity(img[r][c + 6])); - simd8i right((int)get_pixel_intensity(img[r][c + 1]), - (int)get_pixel_intensity(img[r][c + 2]), - (int)get_pixel_intensity(img[r][c + 3]), - (int)get_pixel_intensity(img[r][c + 4]), - (int)get_pixel_intensity(img[r][c + 5]), - (int)get_pixel_intensity(img[r][c + 6]), - (int)get_pixel_intensity(img[r][c + 7]), - (int)get_pixel_intensity(img[r][c + 8])); - - simd8i top((int)get_pixel_intensity(img[r - 1][c]), - (int)get_pixel_intensity(img[r - 1][c + 1]), - (int)get_pixel_intensity(img[r - 1][c + 2]), - (int)get_pixel_intensity(img[r - 1][c + 3]), - (int)get_pixel_intensity(img[r - 1][c + 4]), - (int)get_pixel_intensity(img[r - 1][c + 5]), - (int)get_pixel_intensity(img[r - 1][c + 6]), - (int)get_pixel_intensity(img[r - 1][c + 7])); - simd8i bottom((int)get_pixel_intensity(img[r + 1][c]), - (int)get_pixel_intensity(img[r + 1][c + 1]), - (int)get_pixel_intensity(img[r + 1][c + 2]), - (int)get_pixel_intensity(img[r + 1][c + 3]), - (int)get_pixel_intensity(img[r + 1][c + 4]), - (int)get_pixel_intensity(img[r + 1][c + 5]), - (int)get_pixel_intensity(img[r + 1][c + 6]), - (int)get_pixel_intensity(img[r + 1][c + 7])); - - grad_x = right - left; - grad_y = bottom - top; - - len = (grad_x*grad_x + grad_y*grad_y); - } - - // ------------------------------------------------------------------------------------ - - template <typename T, typename mm1, typename mm2> - inline void set_hog ( - dlib::array<array2d<T,mm1>,mm2>& hog, - int o, - int x, - int y, - const float& value - ) - { - hog[o][y][x] = value; - } - - template <typename T, typename mm1, typename mm2> - void init_hog ( - dlib::array<array2d<T,mm1>,mm2>& hog, - int hog_nr, - int hog_nc, - int filter_rows_padding, - int filter_cols_padding - ) - { - const int num_hog_bands = 27+4; - hog.resize(num_hog_bands); - for (int i = 0; i < num_hog_bands; ++i) - { - hog[i].set_size(hog_nr+filter_rows_padding-1, hog_nc+filter_cols_padding-1); - rectangle rect = get_rect(hog[i]); - rect.top() += (filter_rows_padding-1)/2; - rect.left() += (filter_cols_padding-1)/2; - rect.right() -= filter_cols_padding/2; - rect.bottom() -= filter_rows_padding/2; - zero_border_pixels(hog[i],rect); - } - } - - template <typename T, typename mm1, typename mm2> - void init_hog_zero_everything ( - dlib::array<array2d<T,mm1>,mm2>& hog, - int hog_nr, - int hog_nc, - int filter_rows_padding, - int filter_cols_padding - ) - { - const int num_hog_bands = 27+4; - hog.resize(num_hog_bands); - for (int i = 0; i < num_hog_bands; ++i) - { - hog[i].set_size(hog_nr+filter_rows_padding-1, hog_nc+filter_cols_padding-1); - assign_all_pixels(hog[i], 0); - } - } - - // ------------------------------------------------------------------------------------ - - template <typename T, typename mm> - inline void set_hog ( - array2d<matrix<T,31,1>,mm>& hog, - int o, - int x, - int y, - const float& value - ) - { - hog[y][x](o) = value; - } - - template <typename T, typename mm> - void init_hog ( - array2d<matrix<T,31,1>,mm>& hog, - int hog_nr, - int hog_nc, - int filter_rows_padding, - int filter_cols_padding - ) - { - hog.set_size(hog_nr+filter_rows_padding-1, hog_nc+filter_cols_padding-1); - - // now zero out the border region - rectangle rect = get_rect(hog); - rect.top() += (filter_rows_padding-1)/2; - rect.left() += (filter_cols_padding-1)/2; - rect.right() -= filter_cols_padding/2; - rect.bottom() -= filter_rows_padding/2; - border_enumerator be(get_rect(hog),rect); - while (be.move_next()) - { - const point p = be.element(); - set_all_elements(hog[p.y()][p.x()], 0); - } - } - - template <typename T, typename mm> - void init_hog_zero_everything ( - array2d<matrix<T,31,1>,mm>& hog, - int hog_nr, - int hog_nc, - int filter_rows_padding, - int filter_cols_padding - ) - { - hog.set_size(hog_nr+filter_rows_padding-1, hog_nc+filter_cols_padding-1); - - for (long r = 0; r < hog.nr(); ++r) - { - for (long c = 0; c < hog.nc(); ++c) - { - set_all_elements(hog[r][c], 0); - } - } - } - - // ------------------------------------------------------------------------------------ - - template < - typename image_type, - typename out_type - > - void impl_extract_fhog_features_cell_size_1( - const image_type& img_, - out_type& hog, - int filter_rows_padding, - int filter_cols_padding - ) - { - const_image_view<image_type> img(img_); - // make sure requires clause is not broken - DLIB_ASSERT( filter_rows_padding > 0 && - filter_cols_padding > 0 , - "\t void extract_fhog_features()" - << "\n\t Invalid inputs were given to this function. " - << "\n\t filter_rows_padding: " << filter_rows_padding - << "\n\t filter_cols_padding: " << filter_cols_padding - ); - - /* - This function is an optimized version of impl_extract_fhog_features() for - the case where cell_size == 1. - */ - - - // unit vectors used to compute gradient orientation - matrix<float,2,1> directions[9]; - directions[0] = 1.0000, 0.0000; - directions[1] = 0.9397, 0.3420; - directions[2] = 0.7660, 0.6428; - directions[3] = 0.500, 0.8660; - directions[4] = 0.1736, 0.9848; - directions[5] = -0.1736, 0.9848; - directions[6] = -0.5000, 0.8660; - directions[7] = -0.7660, 0.6428; - directions[8] = -0.9397, 0.3420; - - - - if (img.nr() <= 2 || img.nc() <= 2) - { - hog.clear(); - return; - } - - array2d<unsigned char> angle(img.nr(), img.nc()); - - array2d<float> norm(img.nr(), img.nc()); - zero_border_pixels(norm,1,1); - - // memory for HOG features - const long hog_nr = img.nr()-2; - const long hog_nc = img.nc()-2; - - const int padding_rows_offset = (filter_rows_padding-1)/2; - const int padding_cols_offset = (filter_cols_padding-1)/2; - init_hog_zero_everything(hog, hog_nr, hog_nc, filter_rows_padding, filter_cols_padding); - - - const int visible_nr = img.nr()-1; - const int visible_nc = img.nc()-1; - - // First populate the gradient histograms - for (int y = 1; y < visible_nr; y++) - { - int x; - for (x = 1; x < visible_nc - 7; x += 8) - { - // v will be the length of the gradient vectors. - simd8f grad_x, grad_y, v; - get_gradient(y, x, img, grad_x, grad_y, v); - - float _vv[8]; - v.store(_vv); - - // Now snap the gradient to one of 18 orientations - simd8f best_dot = 0; - simd8f best_o = 0; - for (int o = 0; o < 9; o++) - { - simd8f dot = grad_x*directions[o](0) + grad_y*directions[o](1); - simd8f_bool cmp = dot>best_dot; - best_dot = select(cmp, dot, best_dot); - dot *= -1; - best_o = select(cmp, o, best_o); - - cmp = dot > best_dot; - best_dot = select(cmp, dot, best_dot); - best_o = select(cmp, o + 9, best_o); - } - - int32 _best_o[8]; simd8i(best_o).store(_best_o); - - norm[y][x + 0] = _vv[0]; - norm[y][x + 1] = _vv[1]; - norm[y][x + 2] = _vv[2]; - norm[y][x + 3] = _vv[3]; - norm[y][x + 4] = _vv[4]; - norm[y][x + 5] = _vv[5]; - norm[y][x + 6] = _vv[6]; - norm[y][x + 7] = _vv[7]; - - angle[y][x + 0] = _best_o[0]; - angle[y][x + 1] = _best_o[1]; - angle[y][x + 2] = _best_o[2]; - angle[y][x + 3] = _best_o[3]; - angle[y][x + 4] = _best_o[4]; - angle[y][x + 5] = _best_o[5]; - angle[y][x + 6] = _best_o[6]; - angle[y][x + 7] = _best_o[7]; - } - // Now process the right columns that don't fit into simd registers. - for (; x < visible_nc; x++) - { - matrix<float,2,1> grad; - float v; - get_gradient(y,x,img,grad,v); - - // snap to one of 18 orientations - float best_dot = 0; - int best_o = 0; - for (int o = 0; o < 9; o++) - { - const float dot = dlib::dot(directions[o], grad); - if (dot > best_dot) - { - best_dot = dot; - best_o = o; - } - else if (-dot > best_dot) - { - best_dot = -dot; - best_o = o+9; - } - } - - norm[y][x] = v; - angle[y][x] = best_o; - } - } - - const float eps = 0.0001; - // compute features - for (int y = 0; y < hog_nr; y++) - { - const int yy = y+padding_rows_offset; - for (int x = 0; x < hog_nc; x++) - { - const simd4f z1(norm[y+1][x+1], - norm[y][x+1], - norm[y+1][x], - norm[y][x]); - - const simd4f z2(norm[y+1][x+2], - norm[y][x+2], - norm[y+1][x+1], - norm[y][x+1]); - - const simd4f z3(norm[y+2][x+1], - norm[y+1][x+1], - norm[y+2][x], - norm[y+1][x]); - - const simd4f z4(norm[y+2][x+2], - norm[y+1][x+2], - norm[y+2][x+1], - norm[y+1][x+1]); - - const simd4f temp0 = std::sqrt(norm[y+1][x+1]); - const simd4f nn = 0.2*sqrt(z1+z2+z3+z4+eps); - const simd4f n = 0.1/nn; - - simd4f t = 0; - - const int xx = x+padding_cols_offset; - - simd4f h0 = min(temp0,nn)*n; - const float vv = sum(h0); - set_hog(hog,angle[y+1][x+1],xx,yy, vv); - t += h0; - - t *= 2*0.2357; - - // contrast-insensitive features - set_hog(hog,angle[y+1][x+1]%9+18,xx,yy, vv); - - - float temp[4]; - t.store(temp); - - // texture features - set_hog(hog,27,xx,yy, temp[0]); - set_hog(hog,28,xx,yy, temp[1]); - set_hog(hog,29,xx,yy, temp[2]); - set_hog(hog,30,xx,yy, temp[3]); - } - } - } - - // ------------------------------------------------------------------------------------ - - template < - typename image_type, - typename out_type - > - void impl_extract_fhog_features( - const image_type& img_, - out_type& hog, - int cell_size, - int filter_rows_padding, - int filter_cols_padding - ) - { - const_image_view<image_type> img(img_); - // make sure requires clause is not broken - DLIB_ASSERT( cell_size > 0 && - filter_rows_padding > 0 && - filter_cols_padding > 0 , - "\t void extract_fhog_features()" - << "\n\t Invalid inputs were given to this function. " - << "\n\t cell_size: " << cell_size - << "\n\t filter_rows_padding: " << filter_rows_padding - << "\n\t filter_cols_padding: " << filter_cols_padding - ); - - /* - This function implements the HOG feature extraction method described in - the paper: - P. Felzenszwalb, R. Girshick, D. McAllester, D. Ramanan - Object Detection with Discriminatively Trained Part Based Models - IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 32, No. 9, Sep. 2010 - - Moreover, this function is derived from the HOG feature extraction code - from the features.cc file in the voc-releaseX code (see - http://people.cs.uchicago.edu/~rbg/latent/) which is has the following - license (note that the code has been modified to work with grayscale and - color as well as planar and interlaced input and output formats): - - Copyright (C) 2011, 2012 Ross Girshick, Pedro Felzenszwalb - Copyright (C) 2008, 2009, 2010 Pedro Felzenszwalb, Ross Girshick - Copyright (C) 2007 Pedro Felzenszwalb, Deva Ramanan - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - if (cell_size == 1) - { - impl_extract_fhog_features_cell_size_1(img_,hog,filter_rows_padding,filter_cols_padding); - return; - } - - // unit vectors used to compute gradient orientation - matrix<float,2,1> directions[9]; - directions[0] = 1.0000, 0.0000; - directions[1] = 0.9397, 0.3420; - directions[2] = 0.7660, 0.6428; - directions[3] = 0.500, 0.8660; - directions[4] = 0.1736, 0.9848; - directions[5] = -0.1736, 0.9848; - directions[6] = -0.5000, 0.8660; - directions[7] = -0.7660, 0.6428; - directions[8] = -0.9397, 0.3420; - - - - // First we allocate memory for caching orientation histograms & their norms. - const int cells_nr = (int)((float)img.nr()/(float)cell_size + 0.5); - const int cells_nc = (int)((float)img.nc()/(float)cell_size + 0.5); - - if (cells_nr == 0 || cells_nc == 0) - { - hog.clear(); - return; - } - - // We give hist extra padding around the edges (1 cell all the way around the - // edge) so we can avoid needing to do boundary checks when indexing into it - // later on. So some statements assign to the boundary but those values are - // never used. - array2d<matrix<float,18,1> > hist(cells_nr+2, cells_nc+2); - for (long r = 0; r < hist.nr(); ++r) - { - for (long c = 0; c < hist.nc(); ++c) - { - hist[r][c] = 0; - } - } - - array2d<float> norm(cells_nr, cells_nc); - assign_all_pixels(norm, 0); - - // memory for HOG features - const int hog_nr = std::max(cells_nr-2, 0); - const int hog_nc = std::max(cells_nc-2, 0); - if (hog_nr == 0 || hog_nc == 0) - { - hog.clear(); - return; - } - const int padding_rows_offset = (filter_rows_padding-1)/2; - const int padding_cols_offset = (filter_cols_padding-1)/2; - init_hog(hog, hog_nr, hog_nc, filter_rows_padding, filter_cols_padding); - - const int visible_nr = std::min((long)cells_nr*cell_size,img.nr())-1; - const int visible_nc = std::min((long)cells_nc*cell_size,img.nc())-1; - - // First populate the gradient histograms - for (int y = 1; y < visible_nr; y++) - { - const float yp = ((float)y+0.5)/(float)cell_size - 0.5; - const int iyp = (int)std::floor(yp); - const float vy0 = yp - iyp; - const float vy1 = 1.0 - vy0; - int x; - for (x = 1; x < visible_nc - 7; x += 8) - { - simd8f xx(x, x + 1, x + 2, x + 3, x + 4, x + 5, x + 6, x + 7); - // v will be the length of the gradient vectors. - simd8f grad_x, grad_y, v; - get_gradient(y, x, img, grad_x, grad_y, v); - - // We will use bilinear interpolation to add into the histogram bins. - // So first we precompute the values needed to determine how much each - // pixel votes into each bin. - simd8f xp = (xx + 0.5) / (float)cell_size + 0.5; - simd8i ixp = simd8i(xp); - simd8f vx0 = xp - ixp; - simd8f vx1 = 1.0f - vx0; - - v = sqrt(v); - - // Now snap the gradient to one of 18 orientations - simd8f best_dot = 0; - simd8f best_o = 0; - for (int o = 0; o < 9; o++) - { - simd8f dot = grad_x*directions[o](0) + grad_y*directions[o](1); - simd8f_bool cmp = dot>best_dot; - best_dot = select(cmp, dot, best_dot); - dot *= -1; - best_o = select(cmp, o, best_o); - - cmp = dot > best_dot; - best_dot = select(cmp, dot, best_dot); - best_o = select(cmp, o + 9, best_o); - } - - - // Add the gradient magnitude, v, to 4 histograms around pixel using - // bilinear interpolation. - vx1 *= v; - vx0 *= v; - // The amounts for each bin - simd8f v11 = vy1*vx1; - simd8f v01 = vy0*vx1; - simd8f v10 = vy1*vx0; - simd8f v00 = vy0*vx0; - - int32 _best_o[8]; simd8i(best_o).store(_best_o); - int32 _ixp[8]; ixp.store(_ixp); - float _v11[8]; v11.store(_v11); - float _v01[8]; v01.store(_v01); - float _v10[8]; v10.store(_v10); - float _v00[8]; v00.store(_v00); - - hist[iyp + 1][_ixp[0]](_best_o[0]) += _v11[0]; - hist[iyp + 1 + 1][_ixp[0]](_best_o[0]) += _v01[0]; - hist[iyp + 1][_ixp[0] + 1](_best_o[0]) += _v10[0]; - hist[iyp + 1 + 1][_ixp[0] + 1](_best_o[0]) += _v00[0]; - - hist[iyp + 1][_ixp[1]](_best_o[1]) += _v11[1]; - hist[iyp + 1 + 1][_ixp[1]](_best_o[1]) += _v01[1]; - hist[iyp + 1][_ixp[1] + 1](_best_o[1]) += _v10[1]; - hist[iyp + 1 + 1][_ixp[1] + 1](_best_o[1]) += _v00[1]; - - hist[iyp + 1][_ixp[2]](_best_o[2]) += _v11[2]; - hist[iyp + 1 + 1][_ixp[2]](_best_o[2]) += _v01[2]; - hist[iyp + 1][_ixp[2] + 1](_best_o[2]) += _v10[2]; - hist[iyp + 1 + 1][_ixp[2] + 1](_best_o[2]) += _v00[2]; - - hist[iyp + 1][_ixp[3]](_best_o[3]) += _v11[3]; - hist[iyp + 1 + 1][_ixp[3]](_best_o[3]) += _v01[3]; - hist[iyp + 1][_ixp[3] + 1](_best_o[3]) += _v10[3]; - hist[iyp + 1 + 1][_ixp[3] + 1](_best_o[3]) += _v00[3]; - - hist[iyp + 1][_ixp[4]](_best_o[4]) += _v11[4]; - hist[iyp + 1 + 1][_ixp[4]](_best_o[4]) += _v01[4]; - hist[iyp + 1][_ixp[4] + 1](_best_o[4]) += _v10[4]; - hist[iyp + 1 + 1][_ixp[4] + 1](_best_o[4]) += _v00[4]; - - hist[iyp + 1][_ixp[5]](_best_o[5]) += _v11[5]; - hist[iyp + 1 + 1][_ixp[5]](_best_o[5]) += _v01[5]; - hist[iyp + 1][_ixp[5] + 1](_best_o[5]) += _v10[5]; - hist[iyp + 1 + 1][_ixp[5] + 1](_best_o[5]) += _v00[5]; - - hist[iyp + 1][_ixp[6]](_best_o[6]) += _v11[6]; - hist[iyp + 1 + 1][_ixp[6]](_best_o[6]) += _v01[6]; - hist[iyp + 1][_ixp[6] + 1](_best_o[6]) += _v10[6]; - hist[iyp + 1 + 1][_ixp[6] + 1](_best_o[6]) += _v00[6]; - - hist[iyp + 1][_ixp[7]](_best_o[7]) += _v11[7]; - hist[iyp + 1 + 1][_ixp[7]](_best_o[7]) += _v01[7]; - hist[iyp + 1][_ixp[7] + 1](_best_o[7]) += _v10[7]; - hist[iyp + 1 + 1][_ixp[7] + 1](_best_o[7]) += _v00[7]; - } - // Now process the right columns that don't fit into simd registers. - for (; x < visible_nc; x++) - { - matrix<float, 2, 1> grad; - float v; - get_gradient(y,x,img,grad,v); - - // snap to one of 18 orientations - float best_dot = 0; - int best_o = 0; - for (int o = 0; o < 9; o++) - { - const float dot = dlib::dot(directions[o], grad); - if (dot > best_dot) - { - best_dot = dot; - best_o = o; - } - else if (-dot > best_dot) - { - best_dot = -dot; - best_o = o+9; - } - } - - v = std::sqrt(v); - // add to 4 histograms around pixel using bilinear interpolation - const float xp = ((double)x + 0.5) / (double)cell_size - 0.5; - const int ixp = (int)std::floor(xp); - const float vx0 = xp - ixp; - const float vx1 = 1.0 - vx0; - - hist[iyp+1][ixp+1](best_o) += vy1*vx1*v; - hist[iyp+1+1][ixp+1](best_o) += vy0*vx1*v; - hist[iyp+1][ixp+1+1](best_o) += vy1*vx0*v; - hist[iyp+1+1][ixp+1+1](best_o) += vy0*vx0*v; - } - } - - // compute energy in each block by summing over orientations - for (int r = 0; r < cells_nr; ++r) - { - for (int c = 0; c < cells_nc; ++c) - { - for (int o = 0; o < 9; o++) - { - norm[r][c] += (hist[r+1][c+1](o) + hist[r+1][c+1](o+9)) * (hist[r+1][c+1](o) + hist[r+1][c+1](o+9)); - } - } - } - - const float eps = 0.0001; - // compute features - for (int y = 0; y < hog_nr; y++) - { - const int yy = y+padding_rows_offset; - for (int x = 0; x < hog_nc; x++) - { - const simd4f z1(norm[y+1][x+1], - norm[y][x+1], - norm[y+1][x], - norm[y][x]); - - const simd4f z2(norm[y+1][x+2], - norm[y][x+2], - norm[y+1][x+1], - norm[y][x+1]); - - const simd4f z3(norm[y+2][x+1], - norm[y+1][x+1], - norm[y+2][x], - norm[y+1][x]); - - const simd4f z4(norm[y+2][x+2], - norm[y+1][x+2], - norm[y+2][x+1], - norm[y+1][x+1]); - - const simd4f nn = 0.2*sqrt(z1+z2+z3+z4+eps); - const simd4f n = 0.1/nn; - - simd4f t = 0; - - const int xx = x+padding_cols_offset; - - // contrast-sensitive features - for (int o = 0; o < 18; o+=3) - { - simd4f temp0(hist[y+1+1][x+1+1](o)); - simd4f temp1(hist[y+1+1][x+1+1](o+1)); - simd4f temp2(hist[y+1+1][x+1+1](o+2)); - simd4f h0 = min(temp0,nn)*n; - simd4f h1 = min(temp1,nn)*n; - simd4f h2 = min(temp2,nn)*n; - set_hog(hog,o,xx,yy, sum(h0)); - set_hog(hog,o+1,xx,yy, sum(h1)); - set_hog(hog,o+2,xx,yy, sum(h2)); - t += h0+h1+h2; - } - - t *= 2*0.2357; - - // contrast-insensitive features - for (int o = 0; o < 9; o+=3) - { - simd4f temp0 = hist[y+1+1][x+1+1](o) + hist[y+1+1][x+1+1](o+9); - simd4f temp1 = hist[y+1+1][x+1+1](o+1) + hist[y+1+1][x+1+1](o+9+1); - simd4f temp2 = hist[y+1+1][x+1+1](o+2) + hist[y+1+1][x+1+1](o+9+2); - simd4f h0 = min(temp0,nn)*n; - simd4f h1 = min(temp1,nn)*n; - simd4f h2 = min(temp2,nn)*n; - set_hog(hog,o+18,xx,yy, sum(h0)); - set_hog(hog,o+18+1,xx,yy, sum(h1)); - set_hog(hog,o+18+2,xx,yy, sum(h2)); - } - - - float temp[4]; - t.store(temp); - - // texture features - set_hog(hog,27,xx,yy, temp[0]); - set_hog(hog,28,xx,yy, temp[1]); - set_hog(hog,29,xx,yy, temp[2]); - set_hog(hog,30,xx,yy, temp[3]); - } - } - } - - // ------------------------------------------------------------------------------------ - - inline void create_fhog_bar_images ( - dlib::array<matrix<float> >& mbars, - const long w - ) - { - const long bdims = 9; - // Make the oriented lines we use to draw on each HOG cell. - mbars.resize(bdims); - dlib::array<array2d<unsigned char> > bars(bdims); - array2d<unsigned char> temp(w,w); - for (unsigned long i = 0; i < bars.size(); ++i) - { - assign_all_pixels(temp, 0); - draw_line(temp, point(w/2,0), point(w/2,w-1), 255); - rotate_image(temp, bars[i], i*-pi/bars.size()); - - mbars[i] = subm(matrix_cast<float>(mat(bars[i])), centered_rect(get_rect(bars[i]),w,w) ); - } - } - - } // end namespace impl_fhog - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename image_type, - typename T, - typename mm1, - typename mm2 - > - void extract_fhog_features( - const image_type& img, - dlib::array<array2d<T,mm1>,mm2>& hog, - int cell_size = 8, - int filter_rows_padding = 1, - int filter_cols_padding = 1 - ) - { - impl_fhog::impl_extract_fhog_features(img, hog, cell_size, filter_rows_padding, filter_cols_padding); - // If the image is too small then the above function outputs an empty feature map. - // But to make things very uniform in usage we require the output to still have the - // 31 planes (but they are just empty). - if (hog.size() == 0) - hog.resize(31); - } - - template < - typename image_type, - typename T, - typename mm - > - void extract_fhog_features( - const image_type& img, - array2d<matrix<T,31,1>,mm>& hog, - int cell_size = 8, - int filter_rows_padding = 1, - int filter_cols_padding = 1 - ) - { - impl_fhog::impl_extract_fhog_features(img, hog, cell_size, filter_rows_padding, filter_cols_padding); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type, - typename T - > - void extract_fhog_features( - const image_type& img, - matrix<T,0,1>& feats, - int cell_size = 8, - int filter_rows_padding = 1, - int filter_cols_padding = 1 - ) - { - dlib::array<array2d<T> > hog; - extract_fhog_features(img, hog, cell_size, filter_rows_padding, filter_cols_padding); - feats.set_size(hog.size()*hog[0].size()); - for (unsigned long i = 0; i < hog.size(); ++i) - { - const long size = hog[i].size(); - set_rowm(feats, range(i*size, (i+1)*size-1)) = reshape_to_column_vector(mat(hog[i])); - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type - > - matrix<double,0,1> extract_fhog_features( - const image_type& img, - int cell_size = 8, - int filter_rows_padding = 1, - int filter_cols_padding = 1 - ) - { - matrix<double, 0, 1> feats; - extract_fhog_features(img, feats, cell_size, filter_rows_padding, filter_cols_padding); - return feats; - } - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - inline point image_to_fhog ( - point p, - int cell_size = 8, - int filter_rows_padding = 1, - int filter_cols_padding = 1 - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( cell_size > 0 && - filter_rows_padding > 0 && - filter_cols_padding > 0 , - "\t point image_to_fhog()" - << "\n\t Invalid inputs were given to this function. " - << "\n\t cell_size: " << cell_size - << "\n\t filter_rows_padding: " << filter_rows_padding - << "\n\t filter_cols_padding: " << filter_cols_padding - ); - - // There is a one pixel border around the image. - p -= point(1,1); - // There is also a 1 "cell" border around the HOG image formation. - return p/cell_size - point(1,1) + point((filter_cols_padding-1)/2,(filter_rows_padding-1)/2); - } - -// ---------------------------------------------------------------------------------------- - - inline rectangle image_to_fhog ( - const rectangle& rect, - int cell_size = 8, - int filter_rows_padding = 1, - int filter_cols_padding = 1 - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( cell_size > 0 && - filter_rows_padding > 0 && - filter_cols_padding > 0 , - "\t rectangle image_to_fhog()" - << "\n\t Invalid inputs were given to this function. " - << "\n\t cell_size: " << cell_size - << "\n\t filter_rows_padding: " << filter_rows_padding - << "\n\t filter_cols_padding: " << filter_cols_padding - ); - - return rectangle(image_to_fhog(rect.tl_corner(),cell_size,filter_rows_padding,filter_cols_padding), - image_to_fhog(rect.br_corner(),cell_size,filter_rows_padding,filter_cols_padding)); - } - -// ---------------------------------------------------------------------------------------- - - inline point fhog_to_image ( - point p, - int cell_size = 8, - int filter_rows_padding = 1, - int filter_cols_padding = 1 - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( cell_size > 0 && - filter_rows_padding > 0 && - filter_cols_padding > 0 , - "\t point fhog_to_image()" - << "\n\t Invalid inputs were given to this function. " - << "\n\t cell_size: " << cell_size - << "\n\t filter_rows_padding: " << filter_rows_padding - << "\n\t filter_cols_padding: " << filter_cols_padding - ); - - // Convert to image space and then set to the center of the cell. - point offset; - - p = (p+point(1,1)-point((filter_cols_padding-1)/2,(filter_rows_padding-1)/2))*cell_size + point(1,1); - if (p.x() >= 0 && p.y() >= 0) offset = point(cell_size/2,cell_size/2); - if (p.x() < 0 && p.y() >= 0) offset = point(-cell_size/2,cell_size/2); - if (p.x() >= 0 && p.y() < 0) offset = point(cell_size/2,-cell_size/2); - if (p.x() < 0 && p.y() < 0) offset = point(-cell_size/2,-cell_size/2); - return p + offset; - } - -// ---------------------------------------------------------------------------------------- - - inline rectangle fhog_to_image ( - const rectangle& rect, - int cell_size = 8, - int filter_rows_padding = 1, - int filter_cols_padding = 1 - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( cell_size > 0 && - filter_rows_padding > 0 && - filter_cols_padding > 0 , - "\t rectangle fhog_to_image()" - << "\n\t Invalid inputs were given to this function. " - << "\n\t cell_size: " << cell_size - << "\n\t filter_rows_padding: " << filter_rows_padding - << "\n\t filter_cols_padding: " << filter_cols_padding - ); - - return rectangle(fhog_to_image(rect.tl_corner(),cell_size,filter_rows_padding,filter_cols_padding), - fhog_to_image(rect.br_corner(),cell_size,filter_rows_padding,filter_cols_padding)); - } - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename mm1, - typename mm2 - > - matrix<unsigned char> draw_fhog( - const dlib::array<array2d<T,mm1>,mm2>& hog, - const long cell_draw_size = 15, - const float min_response_threshold = 0.0 - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( cell_draw_size > 0 && hog.size()==31, - "\t matrix<unsigned char> draw_fhog()" - << "\n\t Invalid inputs were given to this function. " - << "\n\t cell_draw_size: " << cell_draw_size - << "\n\t hog.size(): " << hog.size() - ); - - dlib::array<matrix<float> > mbars; - impl_fhog::create_fhog_bar_images(mbars,cell_draw_size); - - // now draw the bars onto the HOG cells - matrix<float> himg(hog[0].nr()*cell_draw_size, hog[0].nc()*cell_draw_size); - himg = 0; - for (unsigned long d = 0; d < mbars.size(); ++d) - { - for (long r = 0; r < himg.nr(); r+=cell_draw_size) - { - for (long c = 0; c < himg.nc(); c+=cell_draw_size) - { - const float val = hog[d][r/cell_draw_size][c/cell_draw_size] + - hog[d+mbars.size()][r/cell_draw_size][c/cell_draw_size] + - hog[d+mbars.size()*2][r/cell_draw_size][c/cell_draw_size]; - if (val > min_response_threshold) - { - set_subm(himg, r, c, cell_draw_size, cell_draw_size) += val*mbars[d%mbars.size()]; - } - } - } - } - - const float thresh = mean(himg) + 4 * stddev(himg); - if (thresh != 0) - return matrix_cast<unsigned char>(upperbound(round(himg*255/thresh),255)); - else - return matrix_cast<unsigned char>(himg); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - matrix<unsigned char> draw_fhog ( - const std::vector<matrix<T> >& hog, - const long cell_draw_size = 15, - const float min_response_threshold = 0.0 - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( cell_draw_size > 0 && hog.size()==31, - "\t matrix<unsigned char> draw_fhog()" - << "\n\t Invalid inputs were given to this function. " - << "\n\t cell_draw_size: " << cell_draw_size - << "\n\t hog.size(): " << hog.size() - ); - - // Just convert the input into the right object and then call the above draw_fhog() - // function on it. - dlib::array<array2d<T> > temp(hog.size()); - for (unsigned long i = 0; i < temp.size(); ++i) - { - temp[i].set_size(hog[i].nr(), hog[i].nc()); - for (long r = 0; r < hog[i].nr(); ++r) - { - for (long c = 0; c < hog[i].nc(); ++c) - { - temp[i][r][c] = hog[i](r,c); - } - } - } - return draw_fhog(temp,cell_draw_size, min_response_threshold); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename mm - > - matrix<unsigned char> draw_fhog( - const array2d<matrix<T,31,1>,mm>& hog, - const long cell_draw_size = 15, - const float min_response_threshold = 0.0 - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( cell_draw_size > 0, - "\t matrix<unsigned char> draw_fhog()" - << "\n\t Invalid inputs were given to this function. " - << "\n\t cell_draw_size: " << cell_draw_size - ); - - dlib::array<matrix<float> > mbars; - impl_fhog::create_fhog_bar_images(mbars,cell_draw_size); - - // now draw the bars onto the HOG cells - matrix<float> himg(hog.nr()*cell_draw_size, hog.nc()*cell_draw_size); - himg = 0; - for (unsigned long d = 0; d < mbars.size(); ++d) - { - for (long r = 0; r < himg.nr(); r+=cell_draw_size) - { - for (long c = 0; c < himg.nc(); c+=cell_draw_size) - { - const float val = hog[r/cell_draw_size][c/cell_draw_size](d) + - hog[r/cell_draw_size][c/cell_draw_size](d+mbars.size()) + - hog[r/cell_draw_size][c/cell_draw_size](d+mbars.size()*2); - if (val > min_response_threshold) - { - set_subm(himg, r, c, cell_draw_size, cell_draw_size) += val*mbars[d%mbars.size()]; - } - } - } - } - - const float thresh = mean(himg) + 4 * stddev(himg); - if (thresh != 0) - return matrix_cast<unsigned char>(upperbound(round(himg*255/thresh),255)); - else - return matrix_cast<unsigned char>(himg); - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_fHOG_Hh_ - diff --git a/ml/dlib/dlib/image_transforms/fhog_abstract.h b/ml/dlib/dlib/image_transforms/fhog_abstract.h deleted file mode 100644 index f66c5d55a..000000000 --- a/ml/dlib/dlib/image_transforms/fhog_abstract.h +++ /dev/null @@ -1,346 +0,0 @@ -// Copyright (C) 2013 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_fHOG_ABSTRACT_Hh_ -#ifdef DLIB_fHOG_ABSTRACT_Hh_ - -#include "../matrix/matrix_abstract.h" -#include "../array2d/array2d_kernel_abstract.h" -#include "../array/array_kernel_abstract.h" -#include "../image_processing/generic_image.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type, - typename T, - typename mm - > - void extract_fhog_features( - const image_type& img, - array2d<matrix<T,31,1>,mm>& hog, - int cell_size = 8, - int filter_rows_padding = 1, - int filter_cols_padding = 1 - ); - /*! - requires - - cell_size > 0 - - filter_rows_padding > 0 - - filter_cols_padding > 0 - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - T should be float or double - ensures - - This function implements the HOG feature extraction method described in - the paper: - Object Detection with Discriminatively Trained Part Based Models by - P. Felzenszwalb, R. Girshick, D. McAllester, D. Ramanan - IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 32, No. 9, Sep. 2010 - This means that it takes an input image img and outputs Felzenszwalb's - 31 dimensional version of HOG features, which are stored into #hog. - - The input image is broken into cells that are cell_size by cell_size pixels - and within each cell we compute a 31 dimensional FHOG vector. This vector - describes the gradient structure within the cell. - - A common task is to convolve each channel of the hog image with a linear - filter. This is made more convenient if the contents of #hog includes extra - rows and columns of zero padding along the borders. This extra padding - allows for more efficient convolution code since the code does not need to - perform expensive boundary checking. Therefore, you can set - filter_rows_padding and filter_cols_padding to indicate the size of the - filter you wish to use and this function will ensure #hog has the appropriate - extra zero padding along the borders. In particular, it will include the - following extra padding: - - (filter_rows_padding-1)/2 extra rows of zeros on the top of #hog. - - (filter_cols_padding-1)/2 extra columns of zeros on the left of #hog. - - filter_rows_padding/2 extra rows of zeros on the bottom of #hog. - - filter_cols_padding/2 extra columns of zeros on the right of #hog. - Therefore, the extra padding is done such that functions like - spatially_filter_image() apply their filters to the entire content containing - area of a hog image (note that you should use the following planar version of - extract_fhog_features() instead of the interlaced version if you want to use - spatially_filter_image() on a hog image). - - #hog.nr() == max(round(img.nr()/(double)cell_size)-2,0) + filter_rows_padding-1. - - #hog.nc() == max(round(img.nc()/(double)cell_size)-2,0) + filter_cols_padding-1. - (i.e. Each output dimension is roughly 1/cell_size the original size but - there is a one cell_size border all around the image that is lost and then we - add on any additional padding that is requested.) - - for all valid r and c: - - #hog[r][c] == the FHOG vector describing the cell centered at the pixel location - fhog_to_image(point(c,r),cell_size,filter_rows_padding,filter_cols_padding) in img. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type, - typename T, - typename mm1, - typename mm2 - > - void extract_fhog_features( - const image_type& img, - dlib::array<array2d<T,mm1>,mm2>& hog, - int cell_size = 8, - int filter_rows_padding = 1, - int filter_cols_padding = 1 - ); - /*! - requires - - cell_size > 0 - - filter_rows_padding > 0 - - filter_cols_padding > 0 - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - T should be float or double - ensures - - This function is identical to the above extract_fhog_features() routine - except that it outputs the results in a planar format rather than the - interlaced format used above. That is, each element of the hog vector is - placed into one of 31 images inside #hog. To be precise, if vhog is the - output of the above interlaced version of extract_fhog_features() then we - will have, for all valid r and c: - - #hog[i][r][c] == vhog[r][c](i) - (where 0 <= i < 31) - - #hog.size() == 31 - - for all valid i: - - #hog[i].nr() == hog[0].nr() - - #hog[i].nc() == hog[0].nc() - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type - > - matrix<double,0,1> extract_fhog_features( - const image_type& img, - int cell_size = 8, - int filter_rows_padding = 1, - int filter_cols_padding = 1 - ); - /*! - requires - - cell_size > 0 - - filter_rows_padding > 0 - - filter_cols_padding > 0 - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - ensures - - This function calls the above extract_fhog_features() routine and simply - packages the entire output into a dlib::matrix. The matrix is constructed - using the planar version of extract_fhog_features() and then each output - plane is converted into a column vector and subsequently all 31 column - vectors are concatenated together and returned. - - Each plane is converted into a column vector using reshape_to_column_vector(), - and is therefore represented in row major order inside the returned vector. - - If H is the array<array2d<double>> object output by the planar - extract_fhog_features() then the returned vector is composed by concatenating - H[0], then H[1], then H[2], and so on in ascending index order. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type, - typename T - > - void extract_fhog_features( - const image_type& img, - matrix<T,0,1>& feats, - int cell_size = 8, - int filter_rows_padding = 1, - int filter_cols_padding = 1 - ); - /*! - requires - - cell_size > 0 - - filter_rows_padding > 0 - - filter_cols_padding > 0 - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - T is float, double, or long double - ensures - - This function is identical to the above version of extract_fhog_features() - that returns a matrix<double,0,1> except that it returns the matrix here - through a reference argument instead of returning it by value. - !*/ - -// ---------------------------------------------------------------------------------------- - - inline point image_to_fhog ( - point p, - int cell_size = 8, - int filter_rows_padding = 1, - int filter_cols_padding = 1 - ); - /*! - requires - - cell_size > 0 - - filter_rows_padding > 0 - - filter_cols_padding > 0 - ensures - - When using extract_fhog_features(), each FHOG cell is extracted from a - certain region in the input image. image_to_fhog() returns the identity of - the FHOG cell containing the image pixel at location p. Or in other words, - let P == image_to_fhog(p) and hog be a FHOG feature map output by - extract_fhog_features(), then hog[P.y()][P.x()] == the FHOG vector/cell - containing the point p in the input image. Note that some image points - might not have corresponding feature locations. E.g. border points or points - outside the image. In these cases the returned point will be outside the - input image. - - Note that you should use the same values of cell_size, filter_rows_padding, - and filter_cols_padding that you used with extract_fhog_features(). - !*/ - -// ---------------------------------------------------------------------------------------- - - inline rectangle image_to_fhog ( - const rectangle& rect, - int cell_size = 8, - int filter_rows_padding = 1, - int filter_cols_padding = 1 - ); - /*! - requires - - cell_size > 0 - - filter_rows_padding > 0 - - filter_cols_padding > 0 - ensures - - maps a rectangle from image space to fhog space. In particular this function returns: - rectangle(image_to_fhog(rect.tl_corner(),cell_size,filter_rows_padding,filter_cols_padding), - image_to_fhog(rect.br_corner(),cell_size,filter_rows_padding,filter_cols_padding)) - !*/ - -// ---------------------------------------------------------------------------------------- - - inline point fhog_to_image ( - point p, - int cell_size = 8, - int filter_rows_padding = 1, - int filter_cols_padding = 1 - ); - /*! - requires - - cell_size > 0 - - filter_rows_padding > 0 - - filter_cols_padding > 0 - ensures - - Maps a pixel in a FHOG image (produced by extract_fhog_features()) back to the - corresponding original input pixel. Note that since FHOG images are - spatially downsampled by aggregation into cells the mapping is not totally - invertible. Therefore, the returned location will be the center of the cell - in the original image that contained the FHOG vector at position p. Moreover, - cell_size, filter_rows_padding, and filter_cols_padding should be set to the - values used by the call to extract_fhog_features(). - - Mapping from fhog space to image space is an invertible transformation. That - is, for any point P we have P == image_to_fhog(fhog_to_image(P,cell_size,filter_rows_padding,filter_cols_padding), - cell_size,filter_rows_padding,filter_cols_padding). - !*/ - -// ---------------------------------------------------------------------------------------- - - inline rectangle fhog_to_image ( - const rectangle& rect, - int cell_size = 8, - int filter_rows_padding = 1, - int filter_cols_padding = 1 - ); - /*! - requires - - cell_size > 0 - - filter_rows_padding > 0 - - filter_cols_padding > 0 - ensures - - maps a rectangle from fhog space to image space. In particular this function returns: - rectangle(fhog_to_image(rect.tl_corner(),cell_size,filter_rows_padding,filter_cols_padding), - fhog_to_image(rect.br_corner(),cell_size,filter_rows_padding,filter_cols_padding)) - - Mapping from fhog space to image space is an invertible transformation. That - is, for any rectangle R we have R == image_to_fhog(fhog_to_image(R,cell_size,filter_rows_padding,filter_cols_padding), - cell_size,filter_rows_padding,filter_cols_padding). - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename mm1, - typename mm2 - > - matrix<unsigned char> draw_fhog( - const dlib::array<array2d<T,mm1>,mm2>& hog, - const long cell_draw_size = 15, - const float min_response_threshold = 0.0 - ); - /*! - requires - - cell_draw_size > 0 - - hog.size() == 31 - ensures - - Interprets hog as a FHOG feature map output by extract_fhog_features() and - converts it into an image suitable for display on the screen. In particular, - we draw all the hog cells into a grayscale image in a way that shows the - magnitude and orientation of the gradient energy in each cell. The result is - then returned. - - The size of the cells in the output image will be rendered as cell_draw_size - pixels wide and tall. - - HOG cells with a response value less than min_response_threshold are not - drawn. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - matrix<unsigned char> draw_fhog ( - const std::vector<matrix<T> >& hog, - const long cell_draw_size = 15, - const float min_response_threshold = 0.0 - ); - /*! - requires - - cell_draw_size > 0 - - hog.size() == 31 - ensures - - This function just converts the given hog object into an array<array2d<T>> - and passes it to the above draw_fhog() routine and returns the results. - - HOG cells with a response value less than min_response_threshold are not - drawn. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename mm - > - matrix<unsigned char> draw_fhog( - const array2d<matrix<T,31,1>,mm>& hog, - const long cell_draw_size = 15, - const float min_response_threshold = 0.0 - ); - /*! - requires - - cell_draw_size > 0 - ensures - - Interprets hog as a FHOG feature map output by extract_fhog_features() and - converts it into an image suitable for display on the screen. In particular, - we draw all the hog cells into a grayscale image in a way that shows the - magnitude and orientation of the gradient energy in each cell. The result is - then returned. - - The size of the cells in the output image will be rendered as cell_draw_size - pixels wide and tall. - - HOG cells with a response value less than min_response_threshold are not - drawn. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_fHOG_ABSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/image_transforms/hough_transform.h b/ml/dlib/dlib/image_transforms/hough_transform.h deleted file mode 100644 index 477b4dc2b..000000000 --- a/ml/dlib/dlib/image_transforms/hough_transform.h +++ /dev/null @@ -1,358 +0,0 @@ -// Copyright (C) 2014 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_HOUGH_tRANSFORM_Hh_ -#define DLIB_HOUGH_tRANSFORM_Hh_ - -#include "hough_transform_abstract.h" -#include "../image_processing/generic_image.h" -#include "../geometry.h" -#include "../algs.h" -#include "assign_image.h" -#include <limits> - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - class hough_transform - { - - public: - explicit hough_transform ( - unsigned long size_ - ) : _size(size_) - { - DLIB_CASSERT(size_ > 0, - "\t hough_transform::hough_transform(size_)" - << "\n\t Invalid arguments given to this function." - ); - - even_size = _size - (_size%2); - - const point cent = center(rectangle(0,0,size_-1,size_-1)); - xcos_theta.set_size(size_, size_); - ysin_theta.set_size(size_, size_); - - std::vector<double> cos_theta(size_), sin_theta(size_); - const double scale = 1<<16; - for (unsigned long t = 0; t < size_; ++t) - { - double theta = t*pi/even_size; - - cos_theta[t] = scale*std::cos(theta)/sqrt_2; - sin_theta[t] = scale*std::sin(theta)/sqrt_2; - } - const double offset = scale*even_size/4.0 + 0.5; - - for (unsigned long c = 0; c < size_; ++c) - { - const long x = c - cent.x(); - for (unsigned long t = 0; t < size_; ++t) - xcos_theta(c,t) = static_cast<int32>(x*cos_theta[t] + offset); - } - for (unsigned long r = 0; r < size_; ++r) - { - const long y = r - cent.y(); - for (unsigned long t = 0; t < size_; ++t) - ysin_theta(r,t) = static_cast<int32>(y*sin_theta[t] + offset); - } - } - - unsigned long size( - ) const { return _size; } - - long nr( - ) const { return _size; } - - long nc( - ) const { return _size; } - - std::pair<point, point> get_line ( - const point& p - ) const - { - DLIB_ASSERT(rectangle(0,0,size()-1,size()-1).contains(p) == true, - "\t pair<point,point> hough_transform::get_line(point)" - << "\n\t Invalid arguments given to this function." - << "\n\t p: " << p - << "\n\t size(): " << size() - ); - - // First we compute the radius measured in pixels from the center and the theta - // angle in radians. - typedef dlib::vector<double,2> vect; - const rectangle box(0,0,size()-1,size()-1); - const vect cent = center(box); - double theta = p.x()-cent.x(); - double radius = p.y()-cent.y(); - theta = theta*pi/even_size; - radius = radius*sqrt_2 + 0.5; - - // now make a line segment on the line. - vect v1 = cent + vect(size()+1000,0) + vect(0,radius); - vect v2 = cent - vect(size()+1000,0) + vect(0,radius); - point p1 = rotate_point(cent, v1, theta); - point p2 = rotate_point(cent, v2, theta); - - clip_line_to_rectangle(box, p1, p2); - - return std::make_pair(p1,p2); - } - - template < - typename image_type - > - point get_best_hough_point ( - const point& p, - const image_type& himg_ - ) - { - const const_image_view<image_type> himg(himg_); - - DLIB_ASSERT(himg.nr() == size() && himg.nc() == size() && - rectangle(0,0,size()-1,size()-1).contains(p) == true, - "\t point hough_transform::get_best_hough_point()" - << "\n\t Invalid arguments given to this function." - << "\n\t himg.nr(): " << himg.nr() - << "\n\t himg.nc(): " << himg.nc() - << "\n\t size(): " << size() - << "\n\t p: " << p - ); - - - typedef typename image_traits<image_type>::pixel_type pixel_type; - COMPILE_TIME_ASSERT(pixel_traits<pixel_type>::grayscale == true); - pixel_type best_val = std::numeric_limits<pixel_type>::min(); - point best_point; - - - const long max_n8 = (himg.nc()/8)*8; - const long max_n4 = (himg.nc()/4)*4; - const long r = p.y(); - const long c = p.x(); - - const int32* ysin = &ysin_theta(r,0); - const int32* xcos = &xcos_theta(c,0); - long t = 0; - while(t < max_n8) - { - long rr0 = (*xcos++ + *ysin++)>>16; - long rr1 = (*xcos++ + *ysin++)>>16; - long rr2 = (*xcos++ + *ysin++)>>16; - long rr3 = (*xcos++ + *ysin++)>>16; - long rr4 = (*xcos++ + *ysin++)>>16; - long rr5 = (*xcos++ + *ysin++)>>16; - long rr6 = (*xcos++ + *ysin++)>>16; - long rr7 = (*xcos++ + *ysin++)>>16; - - if (himg[rr0][t++] > best_val) - { - best_val = himg[rr0][t-1]; - best_point.x() = t-1; - best_point.y() = rr0; - } - if (himg[rr1][t++] > best_val) - { - best_val = himg[rr1][t-1]; - best_point.x() = t-1; - best_point.y() = rr1; - } - if (himg[rr2][t++] > best_val) - { - best_val = himg[rr2][t-1]; - best_point.x() = t-1; - best_point.y() = rr2; - } - if (himg[rr3][t++] > best_val) - { - best_val = himg[rr3][t-1]; - best_point.x() = t-1; - best_point.y() = rr3; - } - if (himg[rr4][t++] > best_val) - { - best_val = himg[rr4][t-1]; - best_point.x() = t-1; - best_point.y() = rr4; - } - if (himg[rr5][t++] > best_val) - { - best_val = himg[rr5][t-1]; - best_point.x() = t-1; - best_point.y() = rr5; - } - if (himg[rr6][t++] > best_val) - { - best_val = himg[rr6][t-1]; - best_point.x() = t-1; - best_point.y() = rr6; - } - if (himg[rr7][t++] > best_val) - { - best_val = himg[rr7][t-1]; - best_point.x() = t-1; - best_point.y() = rr7; - } - } - while(t < max_n4) - { - long rr0 = (*xcos++ + *ysin++)>>16; - long rr1 = (*xcos++ + *ysin++)>>16; - long rr2 = (*xcos++ + *ysin++)>>16; - long rr3 = (*xcos++ + *ysin++)>>16; - if (himg[rr0][t++] > best_val) - { - best_val = himg[rr0][t-1]; - best_point.x() = t-1; - best_point.y() = rr0; - } - if (himg[rr1][t++] > best_val) - { - best_val = himg[rr1][t-1]; - best_point.x() = t-1; - best_point.y() = rr1; - } - if (himg[rr2][t++] > best_val) - { - best_val = himg[rr2][t-1]; - best_point.x() = t-1; - best_point.y() = rr2; - } - if (himg[rr3][t++] > best_val) - { - best_val = himg[rr3][t-1]; - best_point.x() = t-1; - best_point.y() = rr3; - } - } - while(t < himg.nc()) - { - long rr0 = (*xcos++ + *ysin++)>>16; - if (himg[rr0][t++] > best_val) - { - best_val = himg[rr0][t-1]; - best_point.x() = t-1; - best_point.y() = rr0; - } - } - - return best_point; - } - - template < - typename in_image_type, - typename out_image_type - > - void operator() ( - const in_image_type& img_, - const rectangle& box, - out_image_type& himg_ - ) const - { - typedef typename image_traits<in_image_type>::pixel_type in_pixel_type; - typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; - - DLIB_CASSERT(box.width() == size() && box.height() == size(), - "\t hough_transform::hough_transform(size_)" - << "\n\t Invalid arguments given to this function." - << "\n\t box.width(): " << box.width() - << "\n\t box.height(): " << box.height() - << "\n\t size(): " << size() - ); - - COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type>::grayscale == true); - COMPILE_TIME_ASSERT(pixel_traits<out_pixel_type>::grayscale == true); - - const_image_view<in_image_type> img(img_); - image_view<out_image_type> himg(himg_); - - himg.set_size(size(), size()); - assign_all_pixels(himg, 0); - - const rectangle area = box.intersect(get_rect(img)); - - const long max_n8 = (himg.nc()/8)*8; - const long max_n4 = (himg.nc()/4)*4; - for (long r = area.top(); r <= area.bottom(); ++r) - { - const int32* ysin_base = &ysin_theta(r-box.top(),0); - for (long c = area.left(); c <= area.right(); ++c) - { - const out_pixel_type val = static_cast<out_pixel_type>(img[r][c]); - if (val != 0) - { - /* - // The code in this comment is equivalent to the more complex but - // faster code below. We keep this simple version of the Hough - // transform implementation here just to document what it's doing - // more clearly. - const point cent = center(box); - const long x = c - cent.x(); - const long y = r - cent.y(); - for (long t = 0; t < himg.nc(); ++t) - { - double theta = t*pi/even_size; - double radius = (x*std::cos(theta) + y*std::sin(theta))/sqrt_2 + even_size/2 + 0.5; - long rr = static_cast<long>(radius); - himg[rr][t] += val; - } - continue; - */ - - // Run the speed optimized version of the code in the above - // comment. - const int32* ysin = ysin_base; - const int32* xcos = &xcos_theta(c-box.left(),0); - long t = 0; - while(t < max_n8) - { - long rr0 = (*xcos++ + *ysin++)>>16; - long rr1 = (*xcos++ + *ysin++)>>16; - long rr2 = (*xcos++ + *ysin++)>>16; - long rr3 = (*xcos++ + *ysin++)>>16; - long rr4 = (*xcos++ + *ysin++)>>16; - long rr5 = (*xcos++ + *ysin++)>>16; - long rr6 = (*xcos++ + *ysin++)>>16; - long rr7 = (*xcos++ + *ysin++)>>16; - - himg[rr0][t++] += val; - himg[rr1][t++] += val; - himg[rr2][t++] += val; - himg[rr3][t++] += val; - himg[rr4][t++] += val; - himg[rr5][t++] += val; - himg[rr6][t++] += val; - himg[rr7][t++] += val; - } - while(t < max_n4) - { - long rr0 = (*xcos++ + *ysin++)>>16; - long rr1 = (*xcos++ + *ysin++)>>16; - long rr2 = (*xcos++ + *ysin++)>>16; - long rr3 = (*xcos++ + *ysin++)>>16; - himg[rr0][t++] += val; - himg[rr1][t++] += val; - himg[rr2][t++] += val; - himg[rr3][t++] += val; - } - while(t < himg.nc()) - { - long rr0 = (*xcos++ + *ysin++)>>16; - himg[rr0][t++] += val; - } - } - } - } - } - - private: - - unsigned long _size; - unsigned long even_size; // equal to _size if _size is even, otherwise equal to _size-1. - matrix<int32> xcos_theta, ysin_theta; - }; -} - -#endif // DLIB_HOUGH_tRANSFORM_Hh_ - diff --git a/ml/dlib/dlib/image_transforms/hough_transform_abstract.h b/ml/dlib/dlib/image_transforms/hough_transform_abstract.h deleted file mode 100644 index f0ff2b550..000000000 --- a/ml/dlib/dlib/image_transforms/hough_transform_abstract.h +++ /dev/null @@ -1,145 +0,0 @@ -// Copyright (C) 2014 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_HOUGH_tRANSFORM_ABSTRACT_Hh_ -#ifdef DLIB_HOUGH_tRANSFORM_ABSTRACT_Hh_ - -#include "../geometry.h" -#include "../image_processing/generic_image.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - class hough_transform - { - /*! - WHAT THIS OBJECT REPRESENTS - This object is a tool for computing the line finding version of the Hough - transform given some kind of edge detection image as input. It also allows - the edge pixels to be weighted such that higher weighted edge pixels - contribute correspondingly more to the output of the Hough transform, - allowing stronger edges to create correspondingly stronger line detections - in the final Hough transform. - - THREAD SAFETY - It is safe for multiple threads to make concurrent accesses to this object - without synchronization. - !*/ - - public: - - explicit hough_transform ( - unsigned long size_ - ); - /*! - requires - - size_ > 0 - ensures - - This object will compute Hough transforms that are size_ by size_ pixels. - This is in terms of both the Hough accumulator array size as well as the - input image size. - - #size() == size_ - !*/ - - unsigned long size( - ) const; - /*! - ensures - - returns the size of the Hough transforms generated by this object. In - particular, this object creates Hough transform images that are size() by - size() pixels in size. - !*/ - - long nr( - ) const; - /*! - ensures - - returns size() - !*/ - - long nc( - ) const; - /*! - ensures - - returns size() - !*/ - - std::pair<point, point> get_line ( - const point& p - ) const; - /*! - requires - - rectangle(0,0,size()-1,size()-1).contains(p) == true - (i.e. p must be a point inside the Hough accumulator array) - ensures - - returns the line segment in the original image space corresponding - to Hough transform point p. - - The returned points are inside rectangle(0,0,size()-1,size()-1). - !*/ - - template < - typename image_type - > - point get_best_hough_point ( - const point& p, - const image_type& himg - ); - /*! - requires - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h and it must contain grayscale pixels. - - himg.nr() == size() - - himg.nc() == size() - - rectangle(0,0,size()-1,size()-1).contains(p) == true - ensures - - This function interprets himg as a Hough image and p as a point in the - original image space. Given this, it finds the maximum scoring line that - passes though p. That is, it checks all the Hough accumulator bins in - himg corresponding to lines though p and returns the location with the - largest score. - - returns a point X such that get_rect(himg).contains(X) == true - !*/ - - template < - typename in_image_type, - typename out_image_type - > - void operator() ( - const in_image_type& img, - const rectangle& box, - out_image_type& himg - ) const; - /*! - requires - - in_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h and it must contain grayscale pixels. - - out_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h and it must contain grayscale pixels. - - box.width() == size() - - box.height() == size() - ensures - - Computes the Hough transform of the part of img contained within box. - In particular, we do a grayscale version of the Hough transform where any - non-zero pixel in img is treated as a potential component of a line and - accumulated into the Hough accumulator #himg. However, rather than - adding 1 to each relevant accumulator bin we add the value of the pixel - in img to each Hough accumulator bin. This means that, if all the - pixels in img are 0 or 1 then this routine performs a normal Hough - transform. However, if some pixels have larger values then they will be - weighted correspondingly more in the resulting Hough transform. - - #himg.nr() == size() - - #himg.nc() == size() - - #himg is the Hough transform of the part of img contained in box. Each - point in #himg corresponds to a line in the input box. In particular, - the line for #himg[y][x] is given by get_line(point(x,y)). Also, when - viewing the #himg image, the x-axis gives the angle of the line and the - y-axis the distance of the line from the center of the box. - !*/ - - }; -} - -#endif // DLIB_HOUGH_tRANSFORM_ABSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/image_transforms/image_pyramid.h b/ml/dlib/dlib/image_transforms/image_pyramid.h deleted file mode 100644 index 3efed30d8..000000000 --- a/ml/dlib/dlib/image_transforms/image_pyramid.h +++ /dev/null @@ -1,1238 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_IMAGE_PYRaMID_Hh_ -#define DLIB_IMAGE_PYRaMID_Hh_ - -#include "image_pyramid_abstract.h" -#include "../pixel.h" -#include "../array2d.h" -#include "../geometry.h" -#include "spatial_filtering.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - class pyramid_disable : noncopyable - { - public: - - template <typename T> - vector<double,2> point_down ( - const vector<T,2>& - ) const - { - return vector<double,2>(0,0); - } - - template <typename T> - vector<double,2> point_up ( - const vector<T,2>& - ) const - { - return vector<double,2>(0,0); - } - - // ----------------------------- - - template <typename T> - vector<double,2> point_down ( - const vector<T,2>& p, - unsigned int levels - ) const - { - if (levels == 0) - return p; - else - return vector<double,2>(0,0); - } - - template <typename T> - vector<double,2> point_up ( - const vector<T,2>& p, - unsigned int levels - ) const - { - if (levels == 0) - return p; - else - return vector<double,2>(0,0); - } - - // ----------------------------- - - drectangle rect_up ( - const drectangle& rect - ) const - { - return drectangle(point_up(rect.tl_corner()), point_up(rect.br_corner())); - } - - drectangle rect_up ( - const drectangle& rect, - unsigned int levels - ) const - { - return drectangle(point_up(rect.tl_corner(),levels), point_up(rect.br_corner(),levels)); - } - - // ----------------------------- - - drectangle rect_down ( - const drectangle& rect - ) const - { - return drectangle(point_down(rect.tl_corner()), point_down(rect.br_corner())); - } - - drectangle rect_down ( - const drectangle& rect, - unsigned int levels - ) const - { - return drectangle(point_down(rect.tl_corner(),levels), point_down(rect.br_corner(),levels)); - } - - // ----------------------------- - - public: - - template < - typename in_image_type, - typename out_image_type - > - void operator() ( - // we do this #ifdef stuff to avoid compiler warnings about unused variables. -#ifdef ENABLE_ASSERTS - const in_image_type& original, -#else - const in_image_type& , -#endif - out_image_type& down - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(is_same_object(original, down) == false, - "\t void pyramid_disable::operator()" - << "\n\t is_same_object(original, down): " << is_same_object(original, down) - << "\n\t this: " << this - ); - - typedef typename image_traits<in_image_type>::pixel_type in_pixel_type; - typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; - COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false ); - COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false ); - - set_image_size(down, 0, 0); - } - - template < - typename image_type - > - void operator() ( - image_type& img - ) const - { - typedef typename image_traits<image_type>::pixel_type pixel_type; - COMPILE_TIME_ASSERT( pixel_traits<pixel_type>::has_alpha == false ); - set_image_size(img, 0, 0); - } - }; - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - namespace impl - { - - class pyramid_down_2_1 : noncopyable - { - public: - - template <typename T> - vector<double,2> point_down ( - const vector<T,2>& p - ) const - { - return p/2.0 - vector<double,2>(1.25,0.75); - } - - template <typename T> - vector<double,2> point_up ( - const vector<T,2>& p - ) const - { - return (p + vector<T,2>(1.25,0.75))*2; - } - - // ----------------------------- - - template <typename T> - vector<double,2> point_down ( - const vector<T,2>& p, - unsigned int levels - ) const - { - vector<double,2> temp = p; - for (unsigned int i = 0; i < levels; ++i) - temp = point_down(temp); - return temp; - } - - template <typename T> - vector<double,2> point_up ( - const vector<T,2>& p, - unsigned int levels - ) const - { - vector<double,2> temp = p; - for (unsigned int i = 0; i < levels; ++i) - temp = point_up(temp); - return temp; - } - - // ----------------------------- - - drectangle rect_up ( - const drectangle& rect - ) const - { - return drectangle(point_up(rect.tl_corner()), point_up(rect.br_corner())); - } - - drectangle rect_up ( - const drectangle& rect, - unsigned int levels - ) const - { - return drectangle(point_up(rect.tl_corner(),levels), point_up(rect.br_corner(),levels)); - } - - // ----------------------------- - - drectangle rect_down ( - const drectangle& rect - ) const - { - return drectangle(point_down(rect.tl_corner()), point_down(rect.br_corner())); - } - - drectangle rect_down ( - const drectangle& rect, - unsigned int levels - ) const - { - return drectangle(point_down(rect.tl_corner(),levels), point_down(rect.br_corner(),levels)); - } - - // ----------------------------- - - private: - template <typename T, typename U> - struct both_images_rgb - { - typedef typename image_traits<T>::pixel_type T_pix; - typedef typename image_traits<U>::pixel_type U_pix; - const static bool value = pixel_traits<T_pix>::rgb && pixel_traits<U_pix>::rgb; - }; - public: - - template < - typename in_image_type, - typename out_image_type - > - typename disable_if<both_images_rgb<in_image_type,out_image_type> >::type operator() ( - const in_image_type& original_, - out_image_type& down_ - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT( is_same_object(original_, down_) == false, - "\t void pyramid_down_2_1::operator()" - << "\n\t is_same_object(original_, down_): " << is_same_object(original_, down_) - << "\n\t this: " << this - ); - - typedef typename image_traits<in_image_type>::pixel_type in_pixel_type; - typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; - COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false ); - COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false ); - - const_image_view<in_image_type> original(original_); - image_view<out_image_type> down(down_); - - if (original.nr() <= 8 || original.nc() <= 8) - { - down.clear(); - return; - } - - typedef typename pixel_traits<in_pixel_type>::basic_pixel_type bp_type; - typedef typename promote<bp_type>::type ptype; - array2d<ptype> temp_img; - temp_img.set_size(original.nr(), (original.nc()-3)/2); - down.set_size((original.nr()-3)/2, (original.nc()-3)/2); - - - // This function applies a 5x5 Gaussian filter to the image. It - // does this by separating the filter into its horizontal and vertical - // components and then downsamples the image by dropping every other - // row and column. Note that we can do these things all together in - // one step. - - // apply row filter - for (long r = 0; r < temp_img.nr(); ++r) - { - long oc = 0; - for (long c = 0; c < temp_img.nc(); ++c) - { - ptype pix1; - ptype pix2; - ptype pix3; - ptype pix4; - ptype pix5; - - assign_pixel(pix1, original[r][oc]); - assign_pixel(pix2, original[r][oc+1]); - assign_pixel(pix3, original[r][oc+2]); - assign_pixel(pix4, original[r][oc+3]); - assign_pixel(pix5, original[r][oc+4]); - - pix2 *= 4; - pix3 *= 6; - pix4 *= 4; - - assign_pixel(temp_img[r][c], pix1 + pix2 + pix3 + pix4 + pix5); - oc += 2; - } - } - - - // apply column filter - long dr = 0; - for (long r = 2; r < temp_img.nr()-2; r += 2) - { - for (long c = 0; c < temp_img.nc(); ++c) - { - ptype temp = temp_img[r-2][c] + - temp_img[r-1][c]*4 + - temp_img[r ][c]*6 + - temp_img[r+1][c]*4 + - temp_img[r+2][c]; - - assign_pixel(down[dr][c],temp/256); - } - ++dr; - } - - } - - private: - struct rgbptype - { - uint16 red; - uint16 green; - uint16 blue; - }; - public: - // ------------------------------------------ - // OVERLOAD FOR RGB TO RGB IMAGES - // ------------------------------------------ - template < - typename in_image_type, - typename out_image_type - > - typename enable_if<both_images_rgb<in_image_type,out_image_type> >::type operator() ( - const in_image_type& original_, - out_image_type& down_ - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT( is_same_object(original_, down_) == false, - "\t void pyramid_down_2_1::operator()" - << "\n\t is_same_object(original_, down_): " << is_same_object(original_, down_) - << "\n\t this: " << this - ); - - typedef typename image_traits<in_image_type>::pixel_type in_pixel_type; - typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; - COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false ); - COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false ); - - const_image_view<in_image_type> original(original_); - image_view<out_image_type> down(down_); - - if (original.nr() <= 8 || original.nc() <= 8) - { - down.clear(); - return; - } - - array2d<rgbptype> temp_img; - temp_img.set_size(original.nr(), (original.nc()-3)/2); - down.set_size((original.nr()-3)/2, (original.nc()-3)/2); - - - // This function applies a 5x5 Gaussian filter to the image. It - // does this by separating the filter into its horizontal and vertical - // components and then downsamples the image by dropping every other - // row and column. Note that we can do these things all together in - // one step. - - // apply row filter - for (long r = 0; r < temp_img.nr(); ++r) - { - long oc = 0; - for (long c = 0; c < temp_img.nc(); ++c) - { - rgbptype pix1; - rgbptype pix2; - rgbptype pix3; - rgbptype pix4; - rgbptype pix5; - - pix1.red = original[r][oc].red; - pix2.red = original[r][oc+1].red; - pix3.red = original[r][oc+2].red; - pix4.red = original[r][oc+3].red; - pix5.red = original[r][oc+4].red; - pix1.green = original[r][oc].green; - pix2.green = original[r][oc+1].green; - pix3.green = original[r][oc+2].green; - pix4.green = original[r][oc+3].green; - pix5.green = original[r][oc+4].green; - pix1.blue = original[r][oc].blue; - pix2.blue = original[r][oc+1].blue; - pix3.blue = original[r][oc+2].blue; - pix4.blue = original[r][oc+3].blue; - pix5.blue = original[r][oc+4].blue; - - pix2.red *= 4; - pix3.red *= 6; - pix4.red *= 4; - - pix2.green *= 4; - pix3.green *= 6; - pix4.green *= 4; - - pix2.blue *= 4; - pix3.blue *= 6; - pix4.blue *= 4; - - rgbptype temp; - temp.red = pix1.red + pix2.red + pix3.red + pix4.red + pix5.red; - temp.green = pix1.green + pix2.green + pix3.green + pix4.green + pix5.green; - temp.blue = pix1.blue + pix2.blue + pix3.blue + pix4.blue + pix5.blue; - - temp_img[r][c] = temp; - - oc += 2; - } - } - - - // apply column filter - long dr = 0; - for (long r = 2; r < temp_img.nr()-2; r += 2) - { - for (long c = 0; c < temp_img.nc(); ++c) - { - rgbptype temp; - temp.red = temp_img[r-2][c].red + - temp_img[r-1][c].red*4 + - temp_img[r ][c].red*6 + - temp_img[r+1][c].red*4 + - temp_img[r+2][c].red; - temp.green = temp_img[r-2][c].green + - temp_img[r-1][c].green*4 + - temp_img[r ][c].green*6 + - temp_img[r+1][c].green*4 + - temp_img[r+2][c].green; - temp.blue = temp_img[r-2][c].blue + - temp_img[r-1][c].blue*4 + - temp_img[r ][c].blue*6 + - temp_img[r+1][c].blue*4 + - temp_img[r+2][c].blue; - - down[dr][c].red = temp.red/256; - down[dr][c].green = temp.green/256; - down[dr][c].blue = temp.blue/256; - } - ++dr; - } - - } - - template < - typename image_type - > - void operator() ( - image_type& img - ) const - { - image_type temp; - (*this)(img, temp); - swap(temp, img); - } - - private: - - - }; - - // ---------------------------------------------------------------------------------------- - // ---------------------------------------------------------------------------------------- - // ---------------------------------------------------------------------------------------- - - class pyramid_down_3_2 : noncopyable - { - public: - - template <typename T> - vector<double,2> point_down ( - const vector<T,2>& p - ) const - { - const double ratio = 2.0/3.0; - return p*ratio - vector<double,2>(1,1); - } - - template <typename T> - vector<double,2> point_up ( - const vector<T,2>& p - ) const - { - const double ratio = 3.0/2.0; - return p*ratio + vector<T,2>(ratio,ratio); - } - - // ----------------------------- - - template <typename T> - vector<double,2> point_down ( - const vector<T,2>& p, - unsigned int levels - ) const - { - vector<double,2> temp = p; - for (unsigned int i = 0; i < levels; ++i) - temp = point_down(temp); - return temp; - } - - template <typename T> - vector<double,2> point_up ( - const vector<T,2>& p, - unsigned int levels - ) const - { - vector<double,2> temp = p; - for (unsigned int i = 0; i < levels; ++i) - temp = point_up(temp); - return temp; - } - - // ----------------------------- - - drectangle rect_up ( - const drectangle& rect - ) const - { - return drectangle(point_up(rect.tl_corner()), point_up(rect.br_corner())); - } - - drectangle rect_up ( - const drectangle& rect, - unsigned int levels - ) const - { - return drectangle(point_up(rect.tl_corner(),levels), point_up(rect.br_corner(),levels)); - } - - // ----------------------------- - - drectangle rect_down ( - const drectangle& rect - ) const - { - return drectangle(point_down(rect.tl_corner()), point_down(rect.br_corner())); - } - - drectangle rect_down ( - const drectangle& rect, - unsigned int levels - ) const - { - return drectangle(point_down(rect.tl_corner(),levels), point_down(rect.br_corner(),levels)); - } - - // ----------------------------- - - private: - template <typename T, typename U> - struct both_images_rgb - { - typedef typename image_traits<T>::pixel_type T_pix; - typedef typename image_traits<U>::pixel_type U_pix; - const static bool value = pixel_traits<T_pix>::rgb && pixel_traits<U_pix>::rgb; - }; - public: - - template < - typename in_image_type, - typename out_image_type - > - typename disable_if<both_images_rgb<in_image_type,out_image_type> >::type operator() ( - const in_image_type& original_, - out_image_type& down_ - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(is_same_object(original_, down_) == false, - "\t void pyramid_down_3_2::operator()" - << "\n\t is_same_object(original_, down_): " << is_same_object(original_, down_) - << "\n\t this: " << this - ); - - typedef typename image_traits<in_image_type>::pixel_type in_pixel_type; - typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; - COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false ); - COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false ); - - const_image_view<in_image_type> original(original_); - image_view<out_image_type> down(down_); - - if (original.nr() <= 8 || original.nc() <= 8) - { - down.clear(); - return; - } - - const long size_in = 3; - const long size_out = 2; - - typedef typename pixel_traits<in_pixel_type>::basic_pixel_type bp_type; - typedef typename promote<bp_type>::type ptype; - const long full_nr = size_out*((original.nr()-2)/size_in); - const long part_nr = (size_out*(original.nr()-2))/size_in; - const long full_nc = size_out*((original.nc()-2)/size_in); - const long part_nc = (size_out*(original.nc()-2))/size_in; - down.set_size(part_nr, part_nc); - - - long rr = 1; - long r; - for (r = 0; r < full_nr; r+=size_out) - { - long cc = 1; - long c; - for (c = 0; c < full_nc; c+=size_out) - { - ptype block[size_in][size_in]; - separable_3x3_filter_block_grayscale(block, original_, rr, cc, 2, 12, 2); - - // bi-linearly interpolate block - assign_pixel(down[r][c] , (block[0][0]*9 + block[1][0]*3 + block[0][1]*3 + block[1][1])/(16*256)); - assign_pixel(down[r][c+1] , (block[0][2]*9 + block[1][2]*3 + block[0][1]*3 + block[1][1])/(16*256)); - assign_pixel(down[r+1][c] , (block[2][0]*9 + block[1][0]*3 + block[2][1]*3 + block[1][1])/(16*256)); - assign_pixel(down[r+1][c+1] , (block[2][2]*9 + block[1][2]*3 + block[2][1]*3 + block[1][1])/(16*256)); - - cc += size_in; - } - if (part_nc - full_nc == 1) - { - ptype block[size_in][2]; - separable_3x3_filter_block_grayscale(block, original_, rr, cc, 2, 12, 2); - - // bi-linearly interpolate partial block - assign_pixel(down[r][c] , (block[0][0]*9 + block[1][0]*3 + block[0][1]*3 + block[1][1])/(16*256)); - assign_pixel(down[r+1][c] , (block[2][0]*9 + block[1][0]*3 + block[2][1]*3 + block[1][1])/(16*256)); - } - rr += size_in; - } - if (part_nr - full_nr == 1) - { - long cc = 1; - long c; - for (c = 0; c < full_nc; c+=size_out) - { - ptype block[2][size_in]; - separable_3x3_filter_block_grayscale(block, original_, rr, cc, 2, 12, 2); - - // bi-linearly interpolate partial block - assign_pixel(down[r][c] , (block[0][0]*9 + block[1][0]*3 + block[0][1]*3 + block[1][1])/(16*256)); - assign_pixel(down[r][c+1] , (block[0][2]*9 + block[1][2]*3 + block[0][1]*3 + block[1][1])/(16*256)); - - cc += size_in; - } - if (part_nc - full_nc == 1) - { - ptype block[2][2]; - separable_3x3_filter_block_grayscale(block, original_, rr, cc, 2, 12, 2); - - // bi-linearly interpolate partial block - assign_pixel(down[r][c] , (block[0][0]*9 + block[1][0]*3 + block[0][1]*3 + block[1][1])/(16*256)); - } - } - - } - - private: - struct rgbptype - { - uint32 red; - uint32 green; - uint32 blue; - }; - - public: - // ------------------------------------------ - // OVERLOAD FOR RGB TO RGB IMAGES - // ------------------------------------------ - template < - typename in_image_type, - typename out_image_type - > - typename enable_if<both_images_rgb<in_image_type,out_image_type> >::type operator() ( - const in_image_type& original_, - out_image_type& down_ - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT( is_same_object(original_, down_) == false, - "\t void pyramid_down_3_2::operator()" - << "\n\t is_same_object(original_, down_): " << is_same_object(original_, down_) - << "\n\t this: " << this - ); - - typedef typename image_traits<in_image_type>::pixel_type in_pixel_type; - typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; - COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false ); - COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false ); - - const_image_view<in_image_type> original(original_); - image_view<out_image_type> down(down_); - - if (original.nr() <= 8 || original.nc() <= 8) - { - down.clear(); - return; - } - - const long size_in = 3; - const long size_out = 2; - - const long full_nr = size_out*((original.nr()-2)/size_in); - const long part_nr = (size_out*(original.nr()-2))/size_in; - const long full_nc = size_out*((original.nc()-2)/size_in); - const long part_nc = (size_out*(original.nc()-2))/size_in; - down.set_size(part_nr, part_nc); - - - long rr = 1; - long r; - for (r = 0; r < full_nr; r+=size_out) - { - long cc = 1; - long c; - for (c = 0; c < full_nc; c+=size_out) - { - rgbptype block[size_in][size_in]; - separable_3x3_filter_block_rgb(block, original_, rr, cc, 2, 12, 2); - - // bi-linearly interpolate block - down[r][c].red = (block[0][0].red*9 + block[1][0].red*3 + block[0][1].red*3 + block[1][1].red)/(16*256); - down[r][c].green = (block[0][0].green*9 + block[1][0].green*3 + block[0][1].green*3 + block[1][1].green)/(16*256); - down[r][c].blue = (block[0][0].blue*9 + block[1][0].blue*3 + block[0][1].blue*3 + block[1][1].blue)/(16*256); - - down[r][c+1].red = (block[0][2].red*9 + block[1][2].red*3 + block[0][1].red*3 + block[1][1].red)/(16*256); - down[r][c+1].green = (block[0][2].green*9 + block[1][2].green*3 + block[0][1].green*3 + block[1][1].green)/(16*256); - down[r][c+1].blue = (block[0][2].blue*9 + block[1][2].blue*3 + block[0][1].blue*3 + block[1][1].blue)/(16*256); - - down[r+1][c].red = (block[2][0].red*9 + block[1][0].red*3 + block[2][1].red*3 + block[1][1].red)/(16*256); - down[r+1][c].green = (block[2][0].green*9 + block[1][0].green*3 + block[2][1].green*3 + block[1][1].green)/(16*256); - down[r+1][c].blue = (block[2][0].blue*9 + block[1][0].blue*3 + block[2][1].blue*3 + block[1][1].blue)/(16*256); - - down[r+1][c+1].red = (block[2][2].red*9 + block[1][2].red*3 + block[2][1].red*3 + block[1][1].red)/(16*256); - down[r+1][c+1].green = (block[2][2].green*9 + block[1][2].green*3 + block[2][1].green*3 + block[1][1].green)/(16*256); - down[r+1][c+1].blue = (block[2][2].blue*9 + block[1][2].blue*3 + block[2][1].blue*3 + block[1][1].blue)/(16*256); - - cc += size_in; - } - if (part_nc - full_nc == 1) - { - rgbptype block[size_in][2]; - separable_3x3_filter_block_rgb(block, original_, rr, cc, 2, 12, 2); - - // bi-linearly interpolate partial block - down[r][c].red = (block[0][0].red*9 + block[1][0].red*3 + block[0][1].red*3 + block[1][1].red)/(16*256); - down[r][c].green = (block[0][0].green*9 + block[1][0].green*3 + block[0][1].green*3 + block[1][1].green)/(16*256); - down[r][c].blue = (block[0][0].blue*9 + block[1][0].blue*3 + block[0][1].blue*3 + block[1][1].blue)/(16*256); - - down[r+1][c].red = (block[2][0].red*9 + block[1][0].red*3 + block[2][1].red*3 + block[1][1].red)/(16*256); - down[r+1][c].green = (block[2][0].green*9 + block[1][0].green*3 + block[2][1].green*3 + block[1][1].green)/(16*256); - down[r+1][c].blue = (block[2][0].blue*9 + block[1][0].blue*3 + block[2][1].blue*3 + block[1][1].blue)/(16*256); - } - rr += size_in; - } - if (part_nr - full_nr == 1) - { - long cc = 1; - long c; - for (c = 0; c < full_nc; c+=size_out) - { - rgbptype block[2][size_in]; - separable_3x3_filter_block_rgb(block, original_, rr, cc, 2, 12, 2); - - // bi-linearly interpolate partial block - down[r][c].red = (block[0][0].red*9 + block[1][0].red*3 + block[0][1].red*3 + block[1][1].red)/(16*256); - down[r][c].green = (block[0][0].green*9 + block[1][0].green*3 + block[0][1].green*3 + block[1][1].green)/(16*256); - down[r][c].blue = (block[0][0].blue*9 + block[1][0].blue*3 + block[0][1].blue*3 + block[1][1].blue)/(16*256); - - down[r][c+1].red = (block[0][2].red*9 + block[1][2].red*3 + block[0][1].red*3 + block[1][1].red)/(16*256); - down[r][c+1].green = (block[0][2].green*9 + block[1][2].green*3 + block[0][1].green*3 + block[1][1].green)/(16*256); - down[r][c+1].blue = (block[0][2].blue*9 + block[1][2].blue*3 + block[0][1].blue*3 + block[1][1].blue)/(16*256); - - cc += size_in; - } - if (part_nc - full_nc == 1) - { - rgbptype block[2][2]; - separable_3x3_filter_block_rgb(block, original_, rr, cc, 2, 12, 2); - - // bi-linearly interpolate partial block - down[r][c].red = (block[0][0].red*9 + block[1][0].red*3 + block[0][1].red*3 + block[1][1].red)/(16*256); - down[r][c].green = (block[0][0].green*9 + block[1][0].green*3 + block[0][1].green*3 + block[1][1].green)/(16*256); - down[r][c].blue = (block[0][0].blue*9 + block[1][0].blue*3 + block[0][1].blue*3 + block[1][1].blue)/(16*256); - } - } - } - - template < - typename image_type - > - void operator() ( - image_type& img - ) const - { - image_type temp; - (*this)(img, temp); - swap(temp, img); - } - private: - - - }; - - } - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - unsigned int N - > - class pyramid_down : noncopyable - { - public: - - COMPILE_TIME_ASSERT(N > 0); - - template <typename T> - vector<double,2> point_down ( - const vector<T,2>& p - ) const - { - const double ratio = (N-1.0)/N; - return (p - 0.3)*ratio; - } - - template <typename T> - vector<double,2> point_up ( - const vector<T,2>& p - ) const - { - const double ratio = N/(N-1.0); - return p*ratio + 0.3; - } - - // ----------------------------- - - template <typename T> - vector<double,2> point_down ( - const vector<T,2>& p, - unsigned int levels - ) const - { - vector<double,2> temp = p; - for (unsigned int i = 0; i < levels; ++i) - temp = point_down(temp); - return temp; - } - - template <typename T> - vector<double,2> point_up ( - const vector<T,2>& p, - unsigned int levels - ) const - { - vector<double,2> temp = p; - for (unsigned int i = 0; i < levels; ++i) - temp = point_up(temp); - return temp; - } - - // ----------------------------- - - drectangle rect_up ( - const drectangle& rect - ) const - { - return drectangle(point_up(rect.tl_corner()), point_up(rect.br_corner())); - } - - drectangle rect_up ( - const drectangle& rect, - unsigned int levels - ) const - { - return drectangle(point_up(rect.tl_corner(),levels), point_up(rect.br_corner(),levels)); - } - - // ----------------------------- - - drectangle rect_down ( - const drectangle& rect - ) const - { - return drectangle(point_down(rect.tl_corner()), point_down(rect.br_corner())); - } - - drectangle rect_down ( - const drectangle& rect, - unsigned int levels - ) const - { - return drectangle(point_down(rect.tl_corner(),levels), point_down(rect.br_corner(),levels)); - } - - template < - typename in_image_type, - typename out_image_type - > - void operator() ( - const in_image_type& original, - out_image_type& down - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(is_same_object(original, down) == false, - "\t void pyramid_down::operator()" - << "\n\t is_same_object(original, down): " << is_same_object(original, down) - << "\n\t this: " << this - ); - - typedef typename image_traits<in_image_type>::pixel_type in_pixel_type; - typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; - COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false ); - COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false ); - - - set_image_size(down, ((N-1)*num_rows(original))/N+0.5, ((N-1)*num_columns(original))/N+0.5); - resize_image(original, down); - } - - template < - typename image_type - > - void operator() ( - image_type& img - ) const - { - image_type temp; - (*this)(img, temp); - swap(temp, img); - } - }; - - template <> - class pyramid_down<1> : public pyramid_disable {}; - - template <> - class pyramid_down<2> : public dlib::impl::pyramid_down_2_1 {}; - - template <> - class pyramid_down<3> : public dlib::impl::pyramid_down_3_2 {}; - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template <unsigned int N> - double pyramid_rate(const pyramid_down<N>&) - { - return (N-1.0)/N; - } - -// ---------------------------------------------------------------------------------------- - - template <unsigned int N> - void find_pyramid_down_output_image_size( - const pyramid_down<N>& pyr, - long& nr, - long& nc - ) - { - const double rate = pyramid_rate(pyr); - nr = std::floor(rate*nr); - nc = std::floor(rate*nc); - } - - inline void find_pyramid_down_output_image_size( - const pyramid_down<3>& /*pyr*/, - long& nr, - long& nc - ) - { - nr = 2*(nr-2)/3; - nc = 2*(nc-2)/3; - } - - inline void find_pyramid_down_output_image_size( - const pyramid_down<2>& /*pyr*/, - long& nr, - long& nc - ) - { - nr = (nr-3)/2; - nc = (nc-3)/2; - } - - inline void find_pyramid_down_output_image_size( - const pyramid_down<1>& /*pyr*/, - long& nr, - long& nc - ) - { - nr = 0; - nc = 0; - } - -// ---------------------------------------------------------------------------------------- - - namespace impl - { - template <typename pyramid_type> - void compute_tiled_image_pyramid_details ( - const pyramid_type& pyr, - long nr, - long nc, - const unsigned long padding, - const unsigned long outer_padding, - std::vector<rectangle>& rects, - long& pyramid_image_nr, - long& pyramid_image_nc - ) - { - rects.clear(); - if (nr*nc == 0) - { - pyramid_image_nr = 0; - pyramid_image_nc = 0; - return; - } - - const long min_height = 5; - rects.reserve(100); - rects.push_back(rectangle(nc,nr)); - // build the whole pyramid - while(true) - { - find_pyramid_down_output_image_size(pyr, nr, nc); - if (nr*nc == 0 || nr < min_height) - break; - rects.push_back(rectangle(nc,nr)); - } - - // figure out output image size - long total_height = 0; - for (auto&& i : rects) - total_height += i.height()+padding; - total_height -= padding*2; // don't add unnecessary padding to the very right side. - long height = 0; - long prev_width = 0; - for (auto&& i : rects) - { - // Figure out how far we go on the first column. We go until the next image can - // fit next to the previous one, which means we can double back for the second - // column of images. - if (i.width() <= rects[0].width()-prev_width-(long)padding && - (height-rects[0].height())*2 >= (total_height-rects[0].height())) - { - break; - } - height += i.height() + padding; - prev_width = i.width(); - } - height -= padding; // don't add unnecessary padding to the very right side. - - const long width = rects[0].width(); - pyramid_image_nr = height+outer_padding*2; - pyramid_image_nc = width+outer_padding*2; - - - long y = outer_padding; - size_t i = 0; - while(y < height+(long)outer_padding && i < rects.size()) - { - rects[i] = translate_rect(rects[i],point(outer_padding,y)); - DLIB_ASSERT(rectangle(pyramid_image_nc,pyramid_image_nr).contains(rects[i])); - y += rects[i].height()+padding; - ++i; - } - y -= padding; - while (i < rects.size()) - { - point p1(outer_padding+width-1,y-1); - point p2 = p1 - rects[i].br_corner(); - rectangle rect(p1,p2); - DLIB_ASSERT(rectangle(pyramid_image_nc,pyramid_image_nr).contains(rect)); - // don't keep going on the last row if it would intersect the original image. - if (!rects[0].intersect(rect).is_empty()) - break; - - rects[i] = rect; - y -= rects[i].height()+padding; - ++i; - } - - // Delete any extraneous rectangles if we broke out of the above loop early due to - // intersection with the original image. - rects.resize(i); - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename pyramid_type, - typename image_type1, - typename image_type2 - > - void create_tiled_pyramid ( - const image_type1& img, - image_type2& out_img, - std::vector<rectangle>& rects, - const unsigned long padding = 10, - const unsigned long outer_padding = 0 - ) - { - DLIB_ASSERT(!is_same_object(img, out_img)); - - long out_nr, out_nc; - pyramid_type pyr; - impl::compute_tiled_image_pyramid_details(pyr, img.nr(), img.nc(), padding, outer_padding, rects, out_nr, out_nc); - - set_image_size(out_img, out_nr, out_nc); - assign_all_pixels(out_img, 0); - - if (rects.size() == 0) - return; - - // now build the image pyramid into out_img - auto si = sub_image(out_img, rects[0]); - assign_image(si, img); - for (size_t i = 1; i < rects.size(); ++i) - { - auto s1 = sub_image(out_img, rects[i-1]); - auto s2 = sub_image(out_img, rects[i]); - pyr(s1,s2); - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename pyramid_type - > - dpoint image_to_tiled_pyramid ( - const std::vector<rectangle>& rects, - double scale, - dpoint p - ) - { - DLIB_CASSERT(rects.size() > 0); - DLIB_CASSERT(0 < scale && scale <= 1); - pyramid_type pyr; - // This scale factor maps this many levels down the pyramid - long pyramid_down_iter = static_cast<long>(std::log(scale)/std::log(pyramid_rate(pyr))+0.5); - pyramid_down_iter = put_in_range(0, (long)rects.size()-1, pyramid_down_iter); - - return rects[pyramid_down_iter].tl_corner() + pyr.point_down(p, pyramid_down_iter); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename pyramid_type - > - drectangle image_to_tiled_pyramid ( - const std::vector<rectangle>& rects, - double scale, - drectangle r - ) - { - DLIB_ASSERT(rects.size() > 0); - DLIB_ASSERT(0 < scale && scale <= 1); - return drectangle(image_to_tiled_pyramid<pyramid_type>(rects, scale, r.tl_corner()), - image_to_tiled_pyramid<pyramid_type>(rects, scale, r.br_corner())); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename pyramid_type - > - dpoint tiled_pyramid_to_image ( - const std::vector<rectangle>& rects, - dpoint p - ) - { - DLIB_CASSERT(rects.size() > 0); - - size_t pyramid_down_iter = nearest_rect(rects, p); - - p -= rects[pyramid_down_iter].tl_corner(); - pyramid_type pyr; - return pyr.point_up(p, pyramid_down_iter); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename pyramid_type - > - drectangle tiled_pyramid_to_image ( - const std::vector<rectangle>& rects, - drectangle r - ) - { - DLIB_CASSERT(rects.size() > 0); - - size_t pyramid_down_iter = nearest_rect(rects, dcenter(r)); - - dpoint origin = rects[pyramid_down_iter].tl_corner(); - r = drectangle(r.tl_corner()-origin, r.br_corner()-origin); - pyramid_type pyr; - return pyr.rect_up(r, pyramid_down_iter); - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_IMAGE_PYRaMID_Hh_ - diff --git a/ml/dlib/dlib/image_transforms/image_pyramid_abstract.h b/ml/dlib/dlib/image_transforms/image_pyramid_abstract.h deleted file mode 100644 index a61b275fd..000000000 --- a/ml/dlib/dlib/image_transforms/image_pyramid_abstract.h +++ /dev/null @@ -1,384 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_IMAGE_PYRaMID_ABSTRACT_Hh_ -#ifdef DLIB_IMAGE_PYRaMID_ABSTRACT_Hh_ - -#include "../pixel.h" -#include "../array2d.h" -#include "../geometry.h" -#include "../image_processing/generic_image.h" - -namespace dlib -{ - - template < - unsigned int N - > - class pyramid_down : noncopyable - { - /*! - REQUIREMENTS ON N - N > 0 - - WHAT THIS OBJECT REPRESENTS - This is a simple functor to help create image pyramids. In particular, it - downsamples images at a ratio of N to N-1. - - Note that setting N to 1 means that this object functions like - pyramid_disable (defined at the bottom of this file). - - WARNING, when mapping rectangles from one layer of a pyramid - to another you might end up with rectangles which extend slightly - outside your images. This is because points on the border of an - image at a higher pyramid layer might correspond to points outside - images at lower layers. So just keep this in mind. Note also - that it's easy to deal with. Just say something like this: - rect = rect.intersect(get_rect(my_image)); // keep rect inside my_image - !*/ - public: - - template < - typename in_image_type, - typename out_image_type - > - void operator() ( - const in_image_type& original, - out_image_type& down - ) const; - /*! - requires - - is_same_object(original, down) == false - - in_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - out_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - for both pixel types P in the input and output images, we require: - - pixel_traits<P>::has_alpha == false - ensures - - #down will contain an image that is roughly (N-1)/N times the size of the - original image. - - If both input and output images contain RGB pixels then the downsampled image will - be in color. Otherwise, the downsampling will be performed in a grayscale mode. - - The location of a point P in original image will show up at point point_down(P) - in the #down image. - - Note that some points on the border of the original image might correspond to - points outside the #down image. - !*/ - - template < - typename image_type - > - void operator() ( - image_type& img - ) const; - /*! - requires - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - pixel_traits<typename image_traits<image_type>::pixel_type>::has_alpha == false - ensures - - This function downsamples the given image and stores the results in #img. - In particular, it is equivalent to performing: - (*this)(img, temp); - swap(img, temp); - !*/ - - // ------------------------------- - - template <typename T> - vector<double,2> point_down ( - const vector<T,2>& p - ) const; - /*! - ensures - - interprets p as a point in a parent image and returns the - point in a downsampled image which corresponds to p. - - This function is the inverse of point_up(). I.e. for a point P: - point_down(point_up(P)) == P - !*/ - - template <typename T> - vector<double,2> point_up ( - const vector<T,2>& p - ) const; - /*! - ensures - - interprets p as a point in a downsampled image and returns the - point in a parent image which corresponds to p. - - This function is the inverse of point_down(). I.e. for a point P: - point_up(point_down(P)) == P - !*/ - - drectangle rect_down ( - const drectangle& rect - ) const; - /*! - ensures - - returns drectangle(point_down(rect.tl_corner()), point_down(rect.br_corner())); - (i.e. maps rect into a downsampled) - !*/ - - drectangle rect_up ( - const drectangle& rect - ) const; - /*! - ensures - - returns drectangle(point_up(rect.tl_corner()), point_up(rect.br_corner())); - (i.e. maps rect into a parent image) - !*/ - - // ------------------------------- - - template <typename T> - vector<double,2> point_down ( - const vector<T,2>& p, - unsigned int levels - ) const; - /*! - ensures - - applies point_down() to p levels times and returns the result. - (i.e. point_down(p,2) == point_down(point_down(p)), - point_down(p,1) == point_down(p), - point_down(p,0) == p, etc. ) - !*/ - - template <typename T> - vector<double,2> point_up ( - const vector<T,2>& p, - unsigned int levels - ) const; - /*! - ensures - - applies point_up() to p levels times and returns the result. - (i.e. point_up(p,2) == point_up(point_up(p)), - point_up(p,1) == point_up(p), - point_up(p,0) == p, etc. ) - !*/ - - drectangle rect_down ( - const drectangle& rect, - unsigned int levels - ) const; - /*! - ensures - - returns drectangle(point_down(rect.tl_corner(),levels), point_down(rect.br_corner(),levels)); - (i.e. Basically applies rect_down() to rect levels times and returns the result.) - !*/ - - drectangle rect_up ( - const drectangle& rect, - unsigned int levels - ) const; - /*! - ensures - - returns drectangle(point_up(rect.tl_corner(),levels), point_up(rect.br_corner(),levels)); - (i.e. Basically applies rect_up() to rect levels times and returns the result.) - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - class pyramid_disable : noncopyable - { - /*! - WHAT THIS OBJECT REPRESENTS - This is a function object with an interface identical to pyramid_down (defined - at the top of this file) except that it downsamples images at a ratio of infinity - to 1. That means it always outputs images of size 0 regardless of the size - of the inputs. - - This is useful because it can be supplied to routines which take a pyramid_down - function object and it will essentially disable pyramid processing. This way, - a pyramid oriented function can be turned into a regular routine which processes - just the original undownsampled image. - !*/ - }; - -// ---------------------------------------------------------------------------------------- - - template < - unsigned int N - > - double pyramid_rate( - const pyramid_down<N>& pyr - ); - /*! - ensures - - returns (N-1.0)/N - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - unsigned int N - > - void find_pyramid_down_output_image_size( - const pyramid_down<N>& pyr, - long& nr, - long& nc - ); - /*! - requires - - nr >= 0 - - nc >= 0 - ensures - - If pyr() were called on an image with nr by nc rows and columns, what would - be the size of the output image? This function finds the size of the output - image and stores it back into #nr and #nc. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename pyramid_type, - typename image_type1, - typename image_type2 - > - void create_tiled_pyramid ( - const image_type1& img, - image_type2& out_img, - std::vector<rectangle>& rects, - const unsigned long padding = 10, - const unsigned long outer_padding = 0 - ); - /*! - requires - - pyramid_type == one of the dlib::pyramid_down template instances defined above. - - is_same_object(img, out_img) == false - - image_type1 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - image_type2 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - for both pixel types P in the input and output images, we require: - - pixel_traits<P>::has_alpha == false - ensures - - Creates an image pyramid from the input image img. The pyramid is made using - pyramid_type. The highest resolution image is img and then all further - pyramid levels are generated from pyramid_type's downsampling. The entire - resulting pyramid is packed into a single image and stored in out_img. - - When packing pyramid levels into out_img, there will be padding pixels of - space between each sub-image. There will also be outer_padding pixels of - padding around the edge of the image. All padding pixels have a value of 0. - - The resulting pyramid will be composed of #rects.size() images packed into - out_img. Moreover, #rects[i] is the location inside out_img of the i-th - pyramid level. - - #rects.size() > 0 - - #rects[0] == get_rect(img). I.e. the first rectangle is the highest - resolution pyramid layer. Subsequent elements of #rects correspond to - smaller and smaller pyramid layers inside out_img. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename pyramid_type - > - dpoint image_to_tiled_pyramid ( - const std::vector<rectangle>& rects, - double scale, - dpoint p - ); - /*! - requires - - pyramid_type == one of the dlib::pyramid_down template instances defined above. - - 0 < scale <= 1 - - rects.size() > 0 - ensures - - The function create_tiled_pyramid() converts an image, img, to a "tiled - pyramid" called out_img. It also outputs a vector of rectangles, rect, that - show where each pyramid layer appears in out_img. Therefore, - image_to_tiled_pyramid() allows you to map from coordinates in img (i.e. p) - to coordinates in the tiled pyramid out_img, when given the rects metadata. - - So given a point p in img, you can ask, what coordinate in out_img - corresponds to img[p.y()][p.x()] when things are scale times smaller? This - new coordinate is a location in out_img and is what is returned by this - function. - - A scale of 1 means we don't move anywhere in the pyramid scale space relative - to the input image while smaller values of scale mean we move down the - pyramid. - - Assumes pyramid_type is the pyramid class used to produce the tiled image. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename pyramid_type - > - drectangle image_to_tiled_pyramid ( - const std::vector<rectangle>& rects, - double scale, - drectangle r - ); - /*! - requires - - pyramid_type == one of the dlib::pyramid_down template instances defined above. - - 0 < scale <= 1 - - rects.size() > 0 - ensures - - This function maps from input image space to tiled pyramid coordinate space - just as the above image_to_tiled_pyramid() does, except it operates on - rectangle objects instead of points. - - Assumes pyramid_type is the pyramid class used to produce the tiled image. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename pyramid_type - > - dpoint tiled_pyramid_to_image ( - const std::vector<rectangle>& rects, - dpoint p - ); - /*! - requires - - pyramid_type == one of the dlib::pyramid_down template instances defined above. - - rects.size() > 0 - ensures - - This function maps from a coordinate in a tiled pyramid to the corresponding - input image coordinate. Therefore, it is essentially the inverse of - image_to_tiled_pyramid(). - - It should be noted that this function isn't always an inverse of - image_to_tiled_pyramid(). This is because you can ask - image_to_tiled_pyramid() for the coordinates of points outside the input - image and they will be mapped to somewhere that doesn't have an inverse. But - for points actually inside the image this function performs an approximate - inverse mapping. - - Assumes pyramid_type is the pyramid class used to produce the tiled image. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename pyramid_type - > - drectangle tiled_pyramid_to_image ( - const std::vector<rectangle>& rects, - drectangle r - ); - /*! - requires - - pyramid_type == one of the dlib::pyramid_down template instances defined above. - - rects.size() > 0 - ensures - - This function maps from a coordinate in a tiled pyramid to the corresponding - input image coordinate. Therefore, it is essentially the inverse of - image_to_tiled_pyramid(). - - It should be noted that this function isn't always an inverse of - image_to_tiled_pyramid(). This is because you can ask - image_to_tiled_pyramid() for the coordinates of points outside the input - image and they will be mapped to somewhere that doesn't have an inverse. But - for points actually inside the image this function performs an approximate - inverse mapping. - - Assumes pyramid_type is the pyramid class used to produce the tiled image. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_IMAGE_PYRaMID_ABSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/image_transforms/integral_image.h b/ml/dlib/dlib/image_transforms/integral_image.h deleted file mode 100644 index 2ae47d921..000000000 --- a/ml/dlib/dlib/image_transforms/integral_image.h +++ /dev/null @@ -1,190 +0,0 @@ -// Copyright (C) 2009 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_INTEGRAL_IMAGE -#define DLIB_INTEGRAL_IMAGE - -#include "integral_image_abstract.h" - -#include "../algs.h" -#include "../assert.h" -#include "../geometry.h" -#include "../array2d.h" -#include "../matrix.h" -#include "../pixel.h" -#include "../noncopyable.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - class integral_image_generic : noncopyable - { - public: - typedef T value_type; - - long nr() const { return int_img.nr(); } - long nc() const { return int_img.nc(); } - - template <typename image_type> - void load ( - const image_type& img_ - ) - { - const_image_view<image_type> img(img_); - T pixel; - int_img.set_size(img.nr(), img.nc()); - - // compute the first row of the integral image - T temp = 0; - for (long c = 0; c < img.nc(); ++c) - { - assign_pixel(pixel, img[0][c]); - temp += pixel; - int_img[0][c] = temp; - } - - // now compute the rest of the integral image - for (long r = 1; r < img.nr(); ++r) - { - temp = 0; - for (long c = 0; c < img.nc(); ++c) - { - assign_pixel(pixel, img[r][c]); - temp += pixel; - int_img[r][c] = temp + int_img[r-1][c]; - } - } - - } - - value_type get_sum_of_area ( - const rectangle& rect - ) const - { - DLIB_ASSERT(get_rect(*this).contains(rect) == true && rect.is_empty() == false, - "\tvalue_type get_sum_of_area(rect)" - << "\n\tYou have given a rectangle that goes outside the image" - << "\n\tthis: " << this - << "\n\trect.is_empty(): " << rect.is_empty() - << "\n\trect: " << rect - << "\n\tget_rect(*this): " << get_rect(*this) - ); - - T top_left = 0, top_right = 0, bottom_left = 0, bottom_right = 0; - - bottom_right = int_img[rect.bottom()][rect.right()]; - if (rect.left()-1 >= 0 && rect.top()-1 >= 0) - { - top_left = int_img[rect.top()-1][rect.left()-1]; - bottom_left = int_img[rect.bottom()][rect.left()-1]; - top_right = int_img[rect.top()-1][rect.right()]; - } - else if (rect.left()-1 >= 0) - { - bottom_left = int_img[rect.bottom()][rect.left()-1]; - } - else if (rect.top()-1 >= 0) - { - top_right = int_img[rect.top()-1][rect.right()]; - } - - return bottom_right - bottom_left - top_right + top_left; - } - - void swap(integral_image_generic& item) - { - int_img.swap(item.int_img); - } - - private: - - array2d<T> int_img; - }; - - - template < - typename T - > - void swap ( - integral_image_generic<T>& a, - integral_image_generic<T>& b - ) { a.swap(b); } - -// ---------------------------------------------------------------------------------------- - - typedef integral_image_generic<long> integral_image; - -// ---------------------------------------------------------------------------------------- - - template <typename integral_image_type> - typename integral_image_type::value_type haar_x ( - const integral_image_type& img, - const point& p, - long width - ) - { - DLIB_ASSERT(get_rect(img).contains(centered_rect(p,width,width)) == true, - "\tlong haar_x(img,p,width)" - << "\n\tYou have given a point and with that goes outside the image" - << "\n\tget_rect(img): " << get_rect(img) - << "\n\tp: " << p - << "\n\twidth: " << width - ); - - rectangle left_rect; - left_rect.set_left ( p.x() - width / 2 ); - left_rect.set_top ( p.y() - width / 2 ); - left_rect.set_right ( p.x()-1 ); - left_rect.set_bottom ( left_rect.top() + width - 1 ); - - rectangle right_rect; - right_rect.set_left ( p.x() ); - right_rect.set_top ( left_rect.top() ); - right_rect.set_right ( left_rect.left() + width -1 ); - right_rect.set_bottom ( left_rect.bottom() ); - - return img.get_sum_of_area(right_rect) - img.get_sum_of_area(left_rect); - } - - // ---------------------------------------------------------------------------- - - template <typename integral_image_type> - typename integral_image_type::value_type haar_y ( - const integral_image_type& img, - const point& p, - long width - ) - { - DLIB_ASSERT(get_rect(img).contains(centered_rect(p,width,width)) == true, - "\tlong haar_y(img,p,width)" - << "\n\tYou have given a point and with that goes outside the image" - << "\n\tget_rect(img): " << get_rect(img) - << "\n\tp: " << p - << "\n\twidth: " << width - ); - - rectangle top_rect; - top_rect.set_left ( p.x() - width / 2 ); - top_rect.set_top ( p.y() - width / 2 ); - top_rect.set_right ( top_rect.left() + width - 1 ); - top_rect.set_bottom ( p.y()-1 ); - - rectangle bottom_rect; - bottom_rect.set_left ( top_rect.left() ); - bottom_rect.set_top ( p.y() ); - bottom_rect.set_right ( top_rect.right() ); - bottom_rect.set_bottom ( top_rect.top() + width - 1 ); - - return img.get_sum_of_area(bottom_rect) - img.get_sum_of_area(top_rect); - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_INTEGRAL_IMAGE - diff --git a/ml/dlib/dlib/image_transforms/integral_image_abstract.h b/ml/dlib/dlib/image_transforms/integral_image_abstract.h deleted file mode 100644 index 583fa0375..000000000 --- a/ml/dlib/dlib/image_transforms/integral_image_abstract.h +++ /dev/null @@ -1,169 +0,0 @@ -// Copyright (C) 2009 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_INTEGRAL_IMAGe_ABSTRACT_ -#ifdef DLIB_INTEGRAL_IMAGe_ABSTRACT_ - -#include "../geometry/rectangle_abstract.h" -#include "../array2d/array2d_kernel_abstract.h" -#include "../pixel.h" -#include "../noncopyable.h" -#include "../image_processing/generic_image.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - class integral_image_generic : noncopyable - { - /*! - REQUIREMENTS ON T - T should be a built in scalar type. Moreover, it should - be capable of storing sums of whatever kind of pixel - you will be dealing with. - - INITIAL VALUE - - nr() == 0 - - nc() == 0 - - WHAT THIS OBJECT REPRESENTS - This object is an alternate way of representing image data - that allows for very fast computations of sums of pixels in - rectangular regions. To use this object you load it with a - normal image and then you can use the get_sum_of_area() - function to compute sums of pixels in a given area in - constant time. - !*/ - public: - typedef T value_type; - - const long nr( - ) const; - /*! - ensures - - returns the number of rows in this integral image object - !*/ - - const long nc( - ) const; - /*! - ensures - - returns the number of columns in this integral image object - !*/ - - template <typename image_type> - void load ( - const image_type& img - ); - /*! - requires - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - Let P denote the type of pixel in img, then we require: - - pixel_traits<P>::has_alpha == false - ensures - - #nr() == img.nr() - - #nc() == img.nc() - - #*this will now contain an "integral image" representation of the - given input image. - !*/ - - value_type get_sum_of_area ( - const rectangle& rect - ) const; - /*! - requires - - rect.is_empty() == false - - get_rect(*this).contains(rect) == true - (i.e. rect must not be outside the integral image) - ensures - - Let O denote the image this integral image was generated from. - Then this function returns sum(subm(mat(O),rect)). - That is, this function returns the sum of the pixels in O that - are contained within the given rectangle. - !*/ - - void swap( - integral_image_generic& item - ); - /*! - ensures - - swaps *this and item - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - template < typename T > - void swap ( - integral_image_generic<T>& a, - integral_image_generic<T>& b - ) { a.swap(b); } - /*! - provides a global swap function - !*/ - -// ---------------------------------------------------------------------------------------- - - typedef integral_image_generic<long> integral_image; - -// ---------------------------------------------------------------------------------------- - - template <typename integral_image_type> - typename integral_image_type::value_type haar_x ( - const integral_image_type& img, - const point& p, - long width - ) - /*! - requires - - get_rect(img).contains(centered_rect(p,width,width)) == true - - integral_image_type == a type that implements the integral_image_generic - interface defined above - ensures - - returns the response of a Haar wavelet centered at the point p - with the given width. The wavelet is oriented along the X axis - and has the following shape: - ----++++ - ----++++ - ----++++ - ----++++ - That is, the wavelet is square and computes the sum of pixels on the - right minus the sum of pixels on the left. - !*/ - -// ---------------------------------------------------------------------------------------- - - template <typename integral_image_type> - typename integral_image_type::value_type haar_y ( - const integral_image_type& img, - const point& p, - long width - ) - /*! - requires - - get_rect(img).contains(centered_rect(p,width,width)) == true - - integral_image_type == a type that implements the integral_image_generic - interface defined above - ensures - - returns the response of a Haar wavelet centered at the point p - with the given width in the given image. The wavelet is oriented - along the Y axis and has the following shape: - -------- - -------- - ++++++++ - ++++++++ - That is, the wavelet is square and computes the sum of pixels on the - bottom minus the sum of pixels on the top. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_INTEGRAL_IMAGe_ABSTRACT_ - diff --git a/ml/dlib/dlib/image_transforms/interpolation.h b/ml/dlib/dlib/image_transforms/interpolation.h deleted file mode 100644 index 11c561e2d..000000000 --- a/ml/dlib/dlib/image_transforms/interpolation.h +++ /dev/null @@ -1,2193 +0,0 @@ -// Copyright (C) 2012 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_INTERPOlATIONh_ -#define DLIB_INTERPOlATIONh_ - -#include "interpolation_abstract.h" -#include "../pixel.h" -#include "../matrix.h" -#include "assign_image.h" -#include "image_pyramid.h" -#include "../simd.h" -#include "../image_processing/full_object_detection.h" -#include <limits> -#include "../rand.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template <typename T> - struct sub_image_proxy - { - sub_image_proxy() = default; - - sub_image_proxy ( - T& img, - rectangle rect - ) - { - rect = rect.intersect(get_rect(img)); - typedef typename image_traits<T>::pixel_type pixel_type; - - _nr = rect.height(); - _nc = rect.width(); - _width_step = width_step(img); - _data = (char*)image_data(img) + sizeof(pixel_type)*rect.left() + rect.top()*_width_step; - } - - void* _data = 0; - long _width_step = 0; - long _nr = 0; - long _nc = 0; - }; - - template <typename T> - struct const_sub_image_proxy - { - const_sub_image_proxy() = default; - - const_sub_image_proxy ( - const T& img, - rectangle rect - ) - { - rect = rect.intersect(get_rect(img)); - typedef typename image_traits<T>::pixel_type pixel_type; - - _nr = rect.height(); - _nc = rect.width(); - _width_step = width_step(img); - _data = (const char*)image_data(img) + sizeof(pixel_type)*rect.left() + rect.top()*_width_step; - } - - const void* _data = 0; - long _width_step = 0; - long _nr = 0; - long _nc = 0; - }; - - template <typename T> - struct image_traits<sub_image_proxy<T> > - { - typedef typename image_traits<T>::pixel_type pixel_type; - }; - template <typename T> - struct image_traits<const sub_image_proxy<T> > - { - typedef typename image_traits<T>::pixel_type pixel_type; - }; - template <typename T> - struct image_traits<const_sub_image_proxy<T> > - { - typedef typename image_traits<T>::pixel_type pixel_type; - }; - template <typename T> - struct image_traits<const const_sub_image_proxy<T> > - { - typedef typename image_traits<T>::pixel_type pixel_type; - }; - - template <typename T> - inline long num_rows( const sub_image_proxy<T>& img) { return img._nr; } - template <typename T> - inline long num_columns( const sub_image_proxy<T>& img) { return img._nc; } - - template <typename T> - inline long num_rows( const const_sub_image_proxy<T>& img) { return img._nr; } - template <typename T> - inline long num_columns( const const_sub_image_proxy<T>& img) { return img._nc; } - - template <typename T> - inline void* image_data( sub_image_proxy<T>& img) - { - return img._data; - } - template <typename T> - inline const void* image_data( const sub_image_proxy<T>& img) - { - return img._data; - } - - template <typename T> - inline const void* image_data( const const_sub_image_proxy<T>& img) - { - return img._data; - } - - template <typename T> - inline long width_step( - const sub_image_proxy<T>& img - ) { return img._width_step; } - - template <typename T> - inline long width_step( - const const_sub_image_proxy<T>& img - ) { return img._width_step; } - - template <typename T> - void set_image_size(sub_image_proxy<T>& img, long rows, long cols) - { - DLIB_CASSERT(img._nr == rows && img._nc == cols, "A sub_image can't be resized." - << "\n\t img._nr: "<< img._nr - << "\n\t img._nc: "<< img._nc - << "\n\t rows: "<< rows - << "\n\t cols: "<< cols - ); - } - - template < - typename image_type - > - sub_image_proxy<image_type> sub_image ( - image_type& img, - const rectangle& rect - ) - { - return sub_image_proxy<image_type>(img,rect); - } - - template < - typename image_type - > - const const_sub_image_proxy<image_type> sub_image ( - const image_type& img, - const rectangle& rect - ) - { - return const_sub_image_proxy<image_type>(img,rect); - } - - template <typename T> - inline sub_image_proxy<matrix<T>> sub_image ( - T* img, - long nr, - long nc, - long row_stride - ) - { - sub_image_proxy<matrix<T>> tmp; - tmp._data = img; - tmp._nr = nr; - tmp._nc = nc; - tmp._width_step = row_stride*sizeof(T); - return tmp; - } - - template <typename T> - inline const const_sub_image_proxy<matrix<T>> sub_image ( - const T* img, - long nr, - long nc, - long row_stride - ) - { - const_sub_image_proxy<matrix<T>> tmp; - tmp._data = img; - tmp._nr = nr; - tmp._nc = nc; - tmp._width_step = row_stride*sizeof(T); - return tmp; - } - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - class interpolate_nearest_neighbor - { - public: - - template <typename image_view_type, typename pixel_type> - bool operator() ( - const image_view_type& img, - const dlib::point& p, - pixel_type& result - ) const - { - COMPILE_TIME_ASSERT(pixel_traits<typename image_view_type::pixel_type>::has_alpha == false); - - if (get_rect(img).contains(p)) - { - assign_pixel(result, img[p.y()][p.x()]); - return true; - } - else - { - return false; - } - } - - }; - -// ---------------------------------------------------------------------------------------- - - class interpolate_bilinear - { - template <typename T> - struct is_rgb_image - { - const static bool value = pixel_traits<typename T::pixel_type>::rgb; - }; - - public: - - template <typename T, typename image_view_type, typename pixel_type> - typename disable_if<is_rgb_image<image_view_type>,bool>::type operator() ( - const image_view_type& img, - const dlib::vector<T,2>& p, - pixel_type& result - ) const - { - COMPILE_TIME_ASSERT(pixel_traits<typename image_view_type::pixel_type>::has_alpha == false); - - const long left = static_cast<long>(std::floor(p.x())); - const long top = static_cast<long>(std::floor(p.y())); - const long right = left+1; - const long bottom = top+1; - - - // if the interpolation goes outside img - if (!(left >= 0 && top >= 0 && right < img.nc() && bottom < img.nr())) - return false; - - const double lr_frac = p.x() - left; - const double tb_frac = p.y() - top; - - double tl = 0, tr = 0, bl = 0, br = 0; - - assign_pixel(tl, img[top][left]); - assign_pixel(tr, img[top][right]); - assign_pixel(bl, img[bottom][left]); - assign_pixel(br, img[bottom][right]); - - double temp = (1-tb_frac)*((1-lr_frac)*tl + lr_frac*tr) + - tb_frac*((1-lr_frac)*bl + lr_frac*br); - - assign_pixel(result, temp); - return true; - } - - template <typename T, typename image_view_type, typename pixel_type> - typename enable_if<is_rgb_image<image_view_type>,bool>::type operator() ( - const image_view_type& img, - const dlib::vector<T,2>& p, - pixel_type& result - ) const - { - COMPILE_TIME_ASSERT(pixel_traits<typename image_view_type::pixel_type>::has_alpha == false); - - const long left = static_cast<long>(std::floor(p.x())); - const long top = static_cast<long>(std::floor(p.y())); - const long right = left+1; - const long bottom = top+1; - - - // if the interpolation goes outside img - if (!(left >= 0 && top >= 0 && right < img.nc() && bottom < img.nr())) - return false; - - const double lr_frac = p.x() - left; - const double tb_frac = p.y() - top; - - double tl, tr, bl, br; - - tl = img[top][left].red; - tr = img[top][right].red; - bl = img[bottom][left].red; - br = img[bottom][right].red; - const double red = (1-tb_frac)*((1-lr_frac)*tl + lr_frac*tr) + - tb_frac*((1-lr_frac)*bl + lr_frac*br); - - tl = img[top][left].green; - tr = img[top][right].green; - bl = img[bottom][left].green; - br = img[bottom][right].green; - const double green = (1-tb_frac)*((1-lr_frac)*tl + lr_frac*tr) + - tb_frac*((1-lr_frac)*bl + lr_frac*br); - - tl = img[top][left].blue; - tr = img[top][right].blue; - bl = img[bottom][left].blue; - br = img[bottom][right].blue; - const double blue = (1-tb_frac)*((1-lr_frac)*tl + lr_frac*tr) + - tb_frac*((1-lr_frac)*bl + lr_frac*br); - - rgb_pixel temp; - assign_pixel(temp.red, red); - assign_pixel(temp.green, green); - assign_pixel(temp.blue, blue); - assign_pixel(result, temp); - return true; - } - }; - -// ---------------------------------------------------------------------------------------- - - class interpolate_quadratic - { - template <typename T> - struct is_rgb_image - { - const static bool value = pixel_traits<typename T::pixel_type>::rgb; - }; - - public: - - template <typename T, typename image_view_type, typename pixel_type> - typename disable_if<is_rgb_image<image_view_type>,bool>::type operator() ( - const image_view_type& img, - const dlib::vector<T,2>& p, - pixel_type& result - ) const - { - COMPILE_TIME_ASSERT(pixel_traits<typename image_view_type::pixel_type>::has_alpha == false); - - const point pp(p); - - // if the interpolation goes outside img - if (!get_rect(img).contains(grow_rect(pp,1))) - return false; - - const long r = pp.y(); - const long c = pp.x(); - - const double temp = interpolate(p-pp, - img[r-1][c-1], - img[r-1][c ], - img[r-1][c+1], - img[r ][c-1], - img[r ][c ], - img[r ][c+1], - img[r+1][c-1], - img[r+1][c ], - img[r+1][c+1]); - - assign_pixel(result, temp); - return true; - } - - template <typename T, typename image_view_type, typename pixel_type> - typename enable_if<is_rgb_image<image_view_type>,bool>::type operator() ( - const image_view_type& img, - const dlib::vector<T,2>& p, - pixel_type& result - ) const - { - COMPILE_TIME_ASSERT(pixel_traits<typename image_view_type::pixel_type>::has_alpha == false); - - const point pp(p); - - // if the interpolation goes outside img - if (!get_rect(img).contains(grow_rect(pp,1))) - return false; - - const long r = pp.y(); - const long c = pp.x(); - - const double red = interpolate(p-pp, - img[r-1][c-1].red, - img[r-1][c ].red, - img[r-1][c+1].red, - img[r ][c-1].red, - img[r ][c ].red, - img[r ][c+1].red, - img[r+1][c-1].red, - img[r+1][c ].red, - img[r+1][c+1].red); - const double green = interpolate(p-pp, - img[r-1][c-1].green, - img[r-1][c ].green, - img[r-1][c+1].green, - img[r ][c-1].green, - img[r ][c ].green, - img[r ][c+1].green, - img[r+1][c-1].green, - img[r+1][c ].green, - img[r+1][c+1].green); - const double blue = interpolate(p-pp, - img[r-1][c-1].blue, - img[r-1][c ].blue, - img[r-1][c+1].blue, - img[r ][c-1].blue, - img[r ][c ].blue, - img[r ][c+1].blue, - img[r+1][c-1].blue, - img[r+1][c ].blue, - img[r+1][c+1].blue); - - - rgb_pixel temp; - assign_pixel(temp.red, red); - assign_pixel(temp.green, green); - assign_pixel(temp.blue, blue); - assign_pixel(result, temp); - - return true; - } - - private: - - /* tl tm tr - ml mm mr - bl bm br - */ - // The above is the pixel layout in our little 3x3 neighborhood. interpolate() will - // fit a quadratic to these 9 pixels and then use that quadratic to find the interpolated - // value at point p. - inline double interpolate( - const dlib::vector<double,2>& p, - double tl, double tm, double tr, - double ml, double mm, double mr, - double bl, double bm, double br - ) const - { - matrix<double,6,1> w; - // x - w(0) = (tr + mr + br - tl - ml - bl)*0.16666666666; - // y - w(1) = (bl + bm + br - tl - tm - tr)*0.16666666666; - // x^2 - w(2) = (tl + tr + ml + mr + bl + br)*0.16666666666 - (tm + mm + bm)*0.333333333; - // x*y - w(3) = (tl - tr - bl + br)*0.25; - // y^2 - w(4) = (tl + tm + tr + bl + bm + br)*0.16666666666 - (ml + mm + mr)*0.333333333; - // 1 (constant term) - w(5) = (tm + ml + mr + bm)*0.222222222 - (tl + tr + bl + br)*0.11111111 + (mm)*0.55555556; - - const double x = p.x(); - const double y = p.y(); - - matrix<double,6,1> z; - z = x, y, x*x, x*y, y*y, 1.0; - - return dot(w,z); - } - }; - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - class black_background - { - public: - template <typename pixel_type> - void operator() ( pixel_type& p) const { assign_pixel(p, 0); } - }; - - class white_background - { - public: - template <typename pixel_type> - void operator() ( pixel_type& p) const { assign_pixel(p, 255); } - }; - - class no_background - { - public: - template <typename pixel_type> - void operator() ( pixel_type& ) const { } - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type1, - typename image_type2, - typename interpolation_type, - typename point_mapping_type, - typename background_type - > - void transform_image ( - const image_type1& in_img, - image_type2& out_img, - const interpolation_type& interp, - const point_mapping_type& map_point, - const background_type& set_background, - const rectangle& area - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( get_rect(out_img).contains(area) == true && - is_same_object(in_img, out_img) == false , - "\t void transform_image()" - << "\n\t Invalid inputs were given to this function." - << "\n\t get_rect(out_img).contains(area): " << get_rect(out_img).contains(area) - << "\n\t get_rect(out_img): " << get_rect(out_img) - << "\n\t area: " << area - << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) - ); - - const_image_view<image_type1> imgv(in_img); - image_view<image_type2> out_imgv(out_img); - - for (long r = area.top(); r <= area.bottom(); ++r) - { - for (long c = area.left(); c <= area.right(); ++c) - { - if (!interp(imgv, map_point(dlib::vector<double,2>(c,r)), out_imgv[r][c])) - set_background(out_imgv[r][c]); - } - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type1, - typename image_type2, - typename interpolation_type, - typename point_mapping_type, - typename background_type - > - void transform_image ( - const image_type1& in_img, - image_type2& out_img, - const interpolation_type& interp, - const point_mapping_type& map_point, - const background_type& set_background - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( is_same_object(in_img, out_img) == false , - "\t void transform_image()" - << "\n\t Invalid inputs were given to this function." - << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) - ); - - transform_image(in_img, out_img, interp, map_point, set_background, get_rect(out_img)); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type1, - typename image_type2, - typename interpolation_type, - typename point_mapping_type - > - void transform_image ( - const image_type1& in_img, - image_type2& out_img, - const interpolation_type& interp, - const point_mapping_type& map_point - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( is_same_object(in_img, out_img) == false , - "\t void transform_image()" - << "\n\t Invalid inputs were given to this function." - << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) - ); - - - transform_image(in_img, out_img, interp, map_point, black_background(), get_rect(out_img)); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type1, - typename image_type2, - typename interpolation_type - > - point_transform_affine rotate_image ( - const image_type1& in_img, - image_type2& out_img, - double angle, - const interpolation_type& interp - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( is_same_object(in_img, out_img) == false , - "\t point_transform_affine rotate_image()" - << "\n\t Invalid inputs were given to this function." - << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) - ); - - const rectangle rimg = get_rect(in_img); - - - // figure out bounding box for rotated rectangle - rectangle rect; - rect += rotate_point(center(rimg), rimg.tl_corner(), -angle); - rect += rotate_point(center(rimg), rimg.tr_corner(), -angle); - rect += rotate_point(center(rimg), rimg.bl_corner(), -angle); - rect += rotate_point(center(rimg), rimg.br_corner(), -angle); - set_image_size(out_img, rect.height(), rect.width()); - - const matrix<double,2,2> R = rotation_matrix(angle); - - point_transform_affine trans = point_transform_affine(R, -R*dcenter(get_rect(out_img)) + dcenter(rimg)); - transform_image(in_img, out_img, interp, trans); - return inv(trans); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type1, - typename image_type2 - > - point_transform_affine rotate_image ( - const image_type1& in_img, - image_type2& out_img, - double angle - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( is_same_object(in_img, out_img) == false , - "\t point_transform_affine rotate_image()" - << "\n\t Invalid inputs were given to this function." - << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) - ); - - return rotate_image(in_img, out_img, angle, interpolate_quadratic()); - } - -// ---------------------------------------------------------------------------------------- - - namespace impl - { - class helper_resize_image - { - public: - helper_resize_image( - double x_scale_, - double y_scale_ - ): - x_scale(x_scale_), - y_scale(y_scale_) - {} - - dlib::vector<double,2> operator() ( - const dlib::vector<double,2>& p - ) const - { - return dlib::vector<double,2>(p.x()*x_scale, p.y()*y_scale); - } - - private: - const double x_scale; - const double y_scale; - }; - } - - template < - typename image_type1, - typename image_type2, - typename interpolation_type - > - void resize_image ( - const image_type1& in_img, - image_type2& out_img, - const interpolation_type& interp - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( is_same_object(in_img, out_img) == false , - "\t void resize_image()" - << "\n\t Invalid inputs were given to this function." - << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) - ); - - const double x_scale = (num_columns(in_img)-1)/(double)std::max<long>((num_columns(out_img)-1),1); - const double y_scale = (num_rows(in_img)-1)/(double)std::max<long>((num_rows(out_img)-1),1); - transform_image(in_img, out_img, interp, - dlib::impl::helper_resize_image(x_scale,y_scale)); - } - -// ---------------------------------------------------------------------------------------- - - template <typename image_type> - struct is_rgb_image { const static bool value = pixel_traits<typename image_traits<image_type>::pixel_type>::rgb; }; - template <typename image_type> - struct is_grayscale_image { const static bool value = pixel_traits<typename image_traits<image_type>::pixel_type>::grayscale; }; - - // This is an optimized version of resize_image for the case where bilinear - // interpolation is used. - template < - typename image_type1, - typename image_type2 - > - typename disable_if_c<(is_rgb_image<image_type1>::value&&is_rgb_image<image_type2>::value) || - (is_grayscale_image<image_type1>::value&&is_grayscale_image<image_type2>::value)>::type - resize_image ( - const image_type1& in_img_, - image_type2& out_img_, - interpolate_bilinear - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( is_same_object(in_img_, out_img_) == false , - "\t void resize_image()" - << "\n\t Invalid inputs were given to this function." - << "\n\t is_same_object(in_img_, out_img_): " << is_same_object(in_img_, out_img_) - ); - - const_image_view<image_type1> in_img(in_img_); - image_view<image_type2> out_img(out_img_); - - if (out_img.size() == 0 || in_img.size() == 0) - return; - - - typedef typename image_traits<image_type1>::pixel_type T; - typedef typename image_traits<image_type2>::pixel_type U; - const double x_scale = (in_img.nc()-1)/(double)std::max<long>((out_img.nc()-1),1); - const double y_scale = (in_img.nr()-1)/(double)std::max<long>((out_img.nr()-1),1); - double y = -y_scale; - for (long r = 0; r < out_img.nr(); ++r) - { - y += y_scale; - const long top = static_cast<long>(std::floor(y)); - const long bottom = std::min(top+1, in_img.nr()-1); - const double tb_frac = y - top; - double x = -x_scale; - if (pixel_traits<U>::grayscale) - { - for (long c = 0; c < out_img.nc(); ++c) - { - x += x_scale; - const long left = static_cast<long>(std::floor(x)); - const long right = std::min(left+1, in_img.nc()-1); - const double lr_frac = x - left; - - double tl = 0, tr = 0, bl = 0, br = 0; - - assign_pixel(tl, in_img[top][left]); - assign_pixel(tr, in_img[top][right]); - assign_pixel(bl, in_img[bottom][left]); - assign_pixel(br, in_img[bottom][right]); - - double temp = (1-tb_frac)*((1-lr_frac)*tl + lr_frac*tr) + - tb_frac*((1-lr_frac)*bl + lr_frac*br); - - assign_pixel(out_img[r][c], temp); - } - } - else - { - for (long c = 0; c < out_img.nc(); ++c) - { - x += x_scale; - const long left = static_cast<long>(std::floor(x)); - const long right = std::min(left+1, in_img.nc()-1); - const double lr_frac = x - left; - - const T tl = in_img[top][left]; - const T tr = in_img[top][right]; - const T bl = in_img[bottom][left]; - const T br = in_img[bottom][right]; - - T temp; - assign_pixel(temp, 0); - vector_to_pixel(temp, - (1-tb_frac)*((1-lr_frac)*pixel_to_vector<double>(tl) + lr_frac*pixel_to_vector<double>(tr)) + - tb_frac*((1-lr_frac)*pixel_to_vector<double>(bl) + lr_frac*pixel_to_vector<double>(br))); - assign_pixel(out_img[r][c], temp); - } - } - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type1, - typename image_type2 - > - struct images_have_same_pixel_types - { - typedef typename image_traits<image_type1>::pixel_type ptype1; - typedef typename image_traits<image_type2>::pixel_type ptype2; - const static bool value = is_same_type<ptype1, ptype2>::value; - }; - - template < - typename image_type, - typename image_type2 - > - typename enable_if_c<is_grayscale_image<image_type>::value && is_grayscale_image<image_type2>::value && images_have_same_pixel_types<image_type,image_type2>::value>::type - resize_image ( - const image_type& in_img_, - image_type2& out_img_, - interpolate_bilinear - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( is_same_object(in_img_, out_img_) == false , - "\t void resize_image()" - << "\n\t Invalid inputs were given to this function." - << "\n\t is_same_object(in_img_, out_img_): " << is_same_object(in_img_, out_img_) - ); - - const_image_view<image_type> in_img(in_img_); - image_view<image_type2> out_img(out_img_); - - if (out_img.size() == 0 || in_img.size() == 0) - return; - - typedef typename image_traits<image_type>::pixel_type T; - const double x_scale = (in_img.nc()-1)/(double)std::max<long>((out_img.nc()-1),1); - const double y_scale = (in_img.nr()-1)/(double)std::max<long>((out_img.nr()-1),1); - double y = -y_scale; - for (long r = 0; r < out_img.nr(); ++r) - { - y += y_scale; - const long top = static_cast<long>(std::floor(y)); - const long bottom = std::min(top+1, in_img.nr()-1); - const double tb_frac = y - top; - double x = -4*x_scale; - - const simd4f _tb_frac = tb_frac; - const simd4f _inv_tb_frac = 1-tb_frac; - const simd4f _x_scale = 4*x_scale; - simd4f _x(x, x+x_scale, x+2*x_scale, x+3*x_scale); - long c = 0; - for (;; c+=4) - { - _x += _x_scale; - simd4i left = simd4i(_x); - - simd4f _lr_frac = _x-left; - simd4f _inv_lr_frac = 1-_lr_frac; - simd4i right = left+1; - - simd4f tlf = _inv_tb_frac*_inv_lr_frac; - simd4f trf = _inv_tb_frac*_lr_frac; - simd4f blf = _tb_frac*_inv_lr_frac; - simd4f brf = _tb_frac*_lr_frac; - - int32 fleft[4]; - int32 fright[4]; - left.store(fleft); - right.store(fright); - - if (fright[3] >= in_img.nc()) - break; - simd4f tl(in_img[top][fleft[0]], in_img[top][fleft[1]], in_img[top][fleft[2]], in_img[top][fleft[3]]); - simd4f tr(in_img[top][fright[0]], in_img[top][fright[1]], in_img[top][fright[2]], in_img[top][fright[3]]); - simd4f bl(in_img[bottom][fleft[0]], in_img[bottom][fleft[1]], in_img[bottom][fleft[2]], in_img[bottom][fleft[3]]); - simd4f br(in_img[bottom][fright[0]], in_img[bottom][fright[1]], in_img[bottom][fright[2]], in_img[bottom][fright[3]]); - - simd4f out = simd4f(tlf*tl + trf*tr + blf*bl + brf*br); - float fout[4]; - out.store(fout); - - out_img[r][c] = static_cast<T>(fout[0]); - out_img[r][c+1] = static_cast<T>(fout[1]); - out_img[r][c+2] = static_cast<T>(fout[2]); - out_img[r][c+3] = static_cast<T>(fout[3]); - } - x = -x_scale + c*x_scale; - for (; c < out_img.nc(); ++c) - { - x += x_scale; - const long left = static_cast<long>(std::floor(x)); - const long right = std::min(left+1, in_img.nc()-1); - const float lr_frac = x - left; - - float tl = 0, tr = 0, bl = 0, br = 0; - - assign_pixel(tl, in_img[top][left]); - assign_pixel(tr, in_img[top][right]); - assign_pixel(bl, in_img[bottom][left]); - assign_pixel(br, in_img[bottom][right]); - - float temp = (1-tb_frac)*((1-lr_frac)*tl + lr_frac*tr) + - tb_frac*((1-lr_frac)*bl + lr_frac*br); - - assign_pixel(out_img[r][c], temp); - } - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type - > - typename enable_if<is_rgb_image<image_type> >::type resize_image ( - const image_type& in_img_, - image_type& out_img_, - interpolate_bilinear - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( is_same_object(in_img_, out_img_) == false , - "\t void resize_image()" - << "\n\t Invalid inputs were given to this function." - << "\n\t is_same_object(in_img_, out_img_): " << is_same_object(in_img_, out_img_) - ); - - const_image_view<image_type> in_img(in_img_); - image_view<image_type> out_img(out_img_); - - if (out_img.size() == 0 || in_img.size() == 0) - return; - - - typedef typename image_traits<image_type>::pixel_type T; - const double x_scale = (in_img.nc()-1)/(double)std::max<long>((out_img.nc()-1),1); - const double y_scale = (in_img.nr()-1)/(double)std::max<long>((out_img.nr()-1),1); - double y = -y_scale; - for (long r = 0; r < out_img.nr(); ++r) - { - y += y_scale; - const long top = static_cast<long>(std::floor(y)); - const long bottom = std::min(top+1, in_img.nr()-1); - const double tb_frac = y - top; - double x = -4*x_scale; - - const simd4f _tb_frac = tb_frac; - const simd4f _inv_tb_frac = 1-tb_frac; - const simd4f _x_scale = 4*x_scale; - simd4f _x(x, x+x_scale, x+2*x_scale, x+3*x_scale); - long c = 0; - for (;; c+=4) - { - _x += _x_scale; - simd4i left = simd4i(_x); - simd4f lr_frac = _x-left; - simd4f _inv_lr_frac = 1-lr_frac; - simd4i right = left+1; - - simd4f tlf = _inv_tb_frac*_inv_lr_frac; - simd4f trf = _inv_tb_frac*lr_frac; - simd4f blf = _tb_frac*_inv_lr_frac; - simd4f brf = _tb_frac*lr_frac; - - int32 fleft[4]; - int32 fright[4]; - left.store(fleft); - right.store(fright); - - if (fright[3] >= in_img.nc()) - break; - simd4f tl(in_img[top][fleft[0]].red, in_img[top][fleft[1]].red, in_img[top][fleft[2]].red, in_img[top][fleft[3]].red); - simd4f tr(in_img[top][fright[0]].red, in_img[top][fright[1]].red, in_img[top][fright[2]].red, in_img[top][fright[3]].red); - simd4f bl(in_img[bottom][fleft[0]].red, in_img[bottom][fleft[1]].red, in_img[bottom][fleft[2]].red, in_img[bottom][fleft[3]].red); - simd4f br(in_img[bottom][fright[0]].red, in_img[bottom][fright[1]].red, in_img[bottom][fright[2]].red, in_img[bottom][fright[3]].red); - - simd4i out = simd4i(tlf*tl + trf*tr + blf*bl + brf*br); - int32 fout[4]; - out.store(fout); - - out_img[r][c].red = static_cast<unsigned char>(fout[0]); - out_img[r][c+1].red = static_cast<unsigned char>(fout[1]); - out_img[r][c+2].red = static_cast<unsigned char>(fout[2]); - out_img[r][c+3].red = static_cast<unsigned char>(fout[3]); - - - tl = simd4f(in_img[top][fleft[0]].green, in_img[top][fleft[1]].green, in_img[top][fleft[2]].green, in_img[top][fleft[3]].green); - tr = simd4f(in_img[top][fright[0]].green, in_img[top][fright[1]].green, in_img[top][fright[2]].green, in_img[top][fright[3]].green); - bl = simd4f(in_img[bottom][fleft[0]].green, in_img[bottom][fleft[1]].green, in_img[bottom][fleft[2]].green, in_img[bottom][fleft[3]].green); - br = simd4f(in_img[bottom][fright[0]].green, in_img[bottom][fright[1]].green, in_img[bottom][fright[2]].green, in_img[bottom][fright[3]].green); - out = simd4i(tlf*tl + trf*tr + blf*bl + brf*br); - out.store(fout); - out_img[r][c].green = static_cast<unsigned char>(fout[0]); - out_img[r][c+1].green = static_cast<unsigned char>(fout[1]); - out_img[r][c+2].green = static_cast<unsigned char>(fout[2]); - out_img[r][c+3].green = static_cast<unsigned char>(fout[3]); - - - tl = simd4f(in_img[top][fleft[0]].blue, in_img[top][fleft[1]].blue, in_img[top][fleft[2]].blue, in_img[top][fleft[3]].blue); - tr = simd4f(in_img[top][fright[0]].blue, in_img[top][fright[1]].blue, in_img[top][fright[2]].blue, in_img[top][fright[3]].blue); - bl = simd4f(in_img[bottom][fleft[0]].blue, in_img[bottom][fleft[1]].blue, in_img[bottom][fleft[2]].blue, in_img[bottom][fleft[3]].blue); - br = simd4f(in_img[bottom][fright[0]].blue, in_img[bottom][fright[1]].blue, in_img[bottom][fright[2]].blue, in_img[bottom][fright[3]].blue); - out = simd4i(tlf*tl + trf*tr + blf*bl + brf*br); - out.store(fout); - out_img[r][c].blue = static_cast<unsigned char>(fout[0]); - out_img[r][c+1].blue = static_cast<unsigned char>(fout[1]); - out_img[r][c+2].blue = static_cast<unsigned char>(fout[2]); - out_img[r][c+3].blue = static_cast<unsigned char>(fout[3]); - } - x = -x_scale + c*x_scale; - for (; c < out_img.nc(); ++c) - { - x += x_scale; - const long left = static_cast<long>(std::floor(x)); - const long right = std::min(left+1, in_img.nc()-1); - const double lr_frac = x - left; - - const T tl = in_img[top][left]; - const T tr = in_img[top][right]; - const T bl = in_img[bottom][left]; - const T br = in_img[bottom][right]; - - T temp; - assign_pixel(temp, 0); - vector_to_pixel(temp, - (1-tb_frac)*((1-lr_frac)*pixel_to_vector<double>(tl) + lr_frac*pixel_to_vector<double>(tr)) + - tb_frac*((1-lr_frac)*pixel_to_vector<double>(bl) + lr_frac*pixel_to_vector<double>(br))); - assign_pixel(out_img[r][c], temp); - } - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type1, - typename image_type2 - > - void resize_image ( - const image_type1& in_img, - image_type2& out_img - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( is_same_object(in_img, out_img) == false , - "\t void resize_image()" - << "\n\t Invalid inputs were given to this function." - << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) - ); - - resize_image(in_img, out_img, interpolate_bilinear()); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type - > - void resize_image ( - double size_scale, - image_type& img - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( size_scale > 0 , - "\t void resize_image()" - << "\n\t Invalid inputs were given to this function." - << "\n\t size_scale: " << size_scale - ); - - image_type temp; - set_image_size(temp, std::round(size_scale*num_rows(img)), std::round(size_scale*num_columns(img))); - resize_image(img, temp); - swap(img, temp); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type1, - typename image_type2 - > - point_transform_affine flip_image_left_right ( - const image_type1& in_img, - image_type2& out_img - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( is_same_object(in_img, out_img) == false , - "\t void flip_image_left_right()" - << "\n\t Invalid inputs were given to this function." - << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) - ); - - assign_image(out_img, fliplr(mat(in_img))); - std::vector<dlib::vector<double,2> > from, to; - rectangle r = get_rect(in_img); - from.push_back(r.tl_corner()); to.push_back(r.tr_corner()); - from.push_back(r.bl_corner()); to.push_back(r.br_corner()); - from.push_back(r.tr_corner()); to.push_back(r.tl_corner()); - from.push_back(r.br_corner()); to.push_back(r.bl_corner()); - return find_affine_transform(from,to); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type - > - point_transform_affine flip_image_left_right ( - image_type& img - ) - { - image_type temp; - auto tform = flip_image_left_right(img, temp); - swap(temp,img); - return tform; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type1, - typename image_type2 - > - void flip_image_up_down ( - const image_type1& in_img, - image_type2& out_img - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( is_same_object(in_img, out_img) == false , - "\t void flip_image_up_down()" - << "\n\t Invalid inputs were given to this function." - << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) - ); - - assign_image(out_img, flipud(mat(in_img))); - } - -// ---------------------------------------------------------------------------------------- - - namespace impl - { - inline rectangle flip_rect_left_right ( - const rectangle& rect, - const rectangle& window - ) - { - rectangle temp; - temp.top() = rect.top(); - temp.bottom() = rect.bottom(); - - const long left_dist = rect.left()-window.left(); - - temp.right() = window.right()-left_dist; - temp.left() = temp.right()-rect.width()+1; - return temp; - } - - inline rectangle tform_object ( - const point_transform_affine& tran, - const rectangle& rect - ) - { - return centered_rect(tran(center(rect)), rect.width(), rect.height()); - } - - inline mmod_rect tform_object ( - const point_transform_affine& tran, - mmod_rect rect - ) - { - rect.rect = tform_object(tran, rect.rect); - return rect; - } - - inline full_object_detection tform_object( - const point_transform_affine& tran, - const full_object_detection& obj - ) - { - std::vector<point> parts; - parts.reserve(obj.num_parts()); - for (unsigned long i = 0; i < obj.num_parts(); ++i) - { - if (obj.part(i) != OBJECT_PART_NOT_PRESENT) - parts.push_back(tran(obj.part(i))); - else - parts.push_back(OBJECT_PART_NOT_PRESENT); - } - return full_object_detection(tform_object(tran,obj.get_rect()), parts); - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_array_type, - typename T - > - void add_image_left_right_flips ( - image_array_type& images, - std::vector<std::vector<T> >& objects - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( images.size() == objects.size(), - "\t void add_image_left_right_flips()" - << "\n\t Invalid inputs were given to this function." - << "\n\t images.size(): " << images.size() - << "\n\t objects.size(): " << objects.size() - ); - - typename image_array_type::value_type temp; - std::vector<T> rects; - - const unsigned long num = images.size(); - for (unsigned long j = 0; j < num; ++j) - { - const point_transform_affine tran = flip_image_left_right(images[j], temp); - - rects.clear(); - for (unsigned long i = 0; i < objects[j].size(); ++i) - rects.push_back(impl::tform_object(tran, objects[j][i])); - - images.push_back(std::move(temp)); - objects.push_back(rects); - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_array_type, - typename T, - typename U - > - void add_image_left_right_flips ( - image_array_type& images, - std::vector<std::vector<T> >& objects, - std::vector<std::vector<U> >& objects2 - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( images.size() == objects.size() && - images.size() == objects2.size(), - "\t void add_image_left_right_flips()" - << "\n\t Invalid inputs were given to this function." - << "\n\t images.size(): " << images.size() - << "\n\t objects.size(): " << objects.size() - << "\n\t objects2.size(): " << objects2.size() - ); - - typename image_array_type::value_type temp; - std::vector<T> rects; - std::vector<U> rects2; - - const unsigned long num = images.size(); - for (unsigned long j = 0; j < num; ++j) - { - const point_transform_affine tran = flip_image_left_right(images[j], temp); - images.push_back(std::move(temp)); - - rects.clear(); - for (unsigned long i = 0; i < objects[j].size(); ++i) - rects.push_back(impl::tform_object(tran, objects[j][i])); - objects.push_back(rects); - - rects2.clear(); - for (unsigned long i = 0; i < objects2[j].size(); ++i) - rects2.push_back(impl::tform_object(tran, objects2[j][i])); - objects2.push_back(rects2); - } - } - -// ---------------------------------------------------------------------------------------- - - template <typename image_array_type> - void flip_image_dataset_left_right ( - image_array_type& images, - std::vector<std::vector<rectangle> >& objects - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( images.size() == objects.size(), - "\t void flip_image_dataset_left_right()" - << "\n\t Invalid inputs were given to this function." - << "\n\t images.size(): " << images.size() - << "\n\t objects.size(): " << objects.size() - ); - - typename image_array_type::value_type temp; - for (unsigned long i = 0; i < images.size(); ++i) - { - flip_image_left_right(images[i], temp); - swap(temp,images[i]); - for (unsigned long j = 0; j < objects[i].size(); ++j) - { - objects[i][j] = impl::flip_rect_left_right(objects[i][j], get_rect(images[i])); - } - } - } - -// ---------------------------------------------------------------------------------------- - - template <typename image_array_type> - void flip_image_dataset_left_right ( - image_array_type& images, - std::vector<std::vector<rectangle> >& objects, - std::vector<std::vector<rectangle> >& objects2 - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( images.size() == objects.size() && - images.size() == objects2.size(), - "\t void flip_image_dataset_left_right()" - << "\n\t Invalid inputs were given to this function." - << "\n\t images.size(): " << images.size() - << "\n\t objects.size(): " << objects.size() - << "\n\t objects2.size(): " << objects2.size() - ); - - typename image_array_type::value_type temp; - for (unsigned long i = 0; i < images.size(); ++i) - { - flip_image_left_right(images[i], temp); - swap(temp, images[i]); - for (unsigned long j = 0; j < objects[i].size(); ++j) - { - objects[i][j] = impl::flip_rect_left_right(objects[i][j], get_rect(images[i])); - } - for (unsigned long j = 0; j < objects2[i].size(); ++j) - { - objects2[i][j] = impl::flip_rect_left_right(objects2[i][j], get_rect(images[i])); - } - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename pyramid_type, - typename image_array_type - > - void upsample_image_dataset ( - image_array_type& images, - std::vector<std::vector<rectangle> >& objects, - unsigned long max_image_size = std::numeric_limits<unsigned long>::max() - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( images.size() == objects.size(), - "\t void upsample_image_dataset()" - << "\n\t Invalid inputs were given to this function." - << "\n\t images.size(): " << images.size() - << "\n\t objects.size(): " << objects.size() - ); - - typename image_array_type::value_type temp; - pyramid_type pyr; - for (unsigned long i = 0; i < images.size(); ++i) - { - const unsigned long img_size = num_rows(images[i])*num_columns(images[i]); - if (img_size <= max_image_size) - { - pyramid_up(images[i], temp, pyr); - swap(temp, images[i]); - for (unsigned long j = 0; j < objects[i].size(); ++j) - { - objects[i][j] = pyr.rect_up(objects[i][j]); - } - } - } - } - - template < - typename pyramid_type, - typename image_array_type - > - void upsample_image_dataset ( - image_array_type& images, - std::vector<std::vector<mmod_rect>>& objects, - unsigned long max_image_size = std::numeric_limits<unsigned long>::max() - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( images.size() == objects.size(), - "\t void upsample_image_dataset()" - << "\n\t Invalid inputs were given to this function." - << "\n\t images.size(): " << images.size() - << "\n\t objects.size(): " << objects.size() - ); - - typename image_array_type::value_type temp; - pyramid_type pyr; - for (unsigned long i = 0; i < images.size(); ++i) - { - const unsigned long img_size = num_rows(images[i])*num_columns(images[i]); - if (img_size <= max_image_size) - { - pyramid_up(images[i], temp, pyr); - swap(temp, images[i]); - for (unsigned long j = 0; j < objects[i].size(); ++j) - { - objects[i][j].rect = pyr.rect_up(objects[i][j].rect); - } - } - } - } - - template < - typename pyramid_type, - typename image_array_type - > - void upsample_image_dataset ( - image_array_type& images, - std::vector<std::vector<rectangle> >& objects, - std::vector<std::vector<rectangle> >& objects2, - unsigned long max_image_size = std::numeric_limits<unsigned long>::max() - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( images.size() == objects.size() && - images.size() == objects2.size(), - "\t void upsample_image_dataset()" - << "\n\t Invalid inputs were given to this function." - << "\n\t images.size(): " << images.size() - << "\n\t objects.size(): " << objects.size() - << "\n\t objects2.size(): " << objects2.size() - ); - - typename image_array_type::value_type temp; - pyramid_type pyr; - for (unsigned long i = 0; i < images.size(); ++i) - { - const unsigned long img_size = num_rows(images[i])*num_columns(images[i]); - if (img_size <= max_image_size) - { - pyramid_up(images[i], temp, pyr); - swap(temp, images[i]); - for (unsigned long j = 0; j < objects[i].size(); ++j) - { - objects[i][j] = pyr.rect_up(objects[i][j]); - } - for (unsigned long j = 0; j < objects2[i].size(); ++j) - { - objects2[i][j] = pyr.rect_up(objects2[i][j]); - } - } - } - } - -// ---------------------------------------------------------------------------------------- - - template <typename image_array_type> - void rotate_image_dataset ( - double angle, - image_array_type& images, - std::vector<std::vector<rectangle> >& objects - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( images.size() == objects.size(), - "\t void rotate_image_dataset()" - << "\n\t Invalid inputs were given to this function." - << "\n\t images.size(): " << images.size() - << "\n\t objects.size(): " << objects.size() - ); - - typename image_array_type::value_type temp; - for (unsigned long i = 0; i < images.size(); ++i) - { - const point_transform_affine tran = rotate_image(images[i], temp, angle); - swap(temp, images[i]); - for (unsigned long j = 0; j < objects[i].size(); ++j) - { - const rectangle rect = objects[i][j]; - objects[i][j] = centered_rect(tran(center(rect)), rect.width(), rect.height()); - } - } - } - - template <typename image_array_type> - void rotate_image_dataset ( - double angle, - image_array_type& images, - std::vector<std::vector<rectangle> >& objects, - std::vector<std::vector<rectangle> >& objects2 - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( images.size() == objects.size() && - images.size() == objects2.size(), - "\t void rotate_image_dataset()" - << "\n\t Invalid inputs were given to this function." - << "\n\t images.size(): " << images.size() - << "\n\t objects.size(): " << objects.size() - << "\n\t objects2.size(): " << objects2.size() - ); - - typename image_array_type::value_type temp; - for (unsigned long i = 0; i < images.size(); ++i) - { - const point_transform_affine tran = rotate_image(images[i], temp, angle); - swap(temp, images[i]); - for (unsigned long j = 0; j < objects[i].size(); ++j) - { - const rectangle rect = objects[i][j]; - objects[i][j] = centered_rect(tran(center(rect)), rect.width(), rect.height()); - } - for (unsigned long j = 0; j < objects2[i].size(); ++j) - { - const rectangle rect = objects2[i][j]; - objects2[i][j] = centered_rect(tran(center(rect)), rect.width(), rect.height()); - } - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_array_type, - typename EXP, - typename T, - typename U - > - void add_image_rotations ( - const matrix_exp<EXP>& angles, - image_array_type& images, - std::vector<std::vector<T> >& objects, - std::vector<std::vector<U> >& objects2 - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( is_vector(angles) && angles.size() > 0 && - images.size() == objects.size() && - images.size() == objects2.size(), - "\t void add_image_rotations()" - << "\n\t Invalid inputs were given to this function." - << "\n\t is_vector(angles): " << is_vector(angles) - << "\n\t angles.size(): " << angles.size() - << "\n\t images.size(): " << images.size() - << "\n\t objects.size(): " << objects.size() - << "\n\t objects2.size(): " << objects2.size() - ); - - image_array_type new_images; - std::vector<std::vector<T> > new_objects; - std::vector<std::vector<U> > new_objects2; - - using namespace impl; - - std::vector<T> objtemp; - std::vector<U> objtemp2; - typename image_array_type::value_type temp; - for (long i = 0; i < angles.size(); ++i) - { - for (unsigned long j = 0; j < images.size(); ++j) - { - const point_transform_affine tran = rotate_image(images[j], temp, angles(i)); - new_images.push_back(std::move(temp)); - - objtemp.clear(); - for (unsigned long k = 0; k < objects[j].size(); ++k) - objtemp.push_back(tform_object(tran, objects[j][k])); - new_objects.push_back(objtemp); - - objtemp2.clear(); - for (unsigned long k = 0; k < objects2[j].size(); ++k) - objtemp2.push_back(tform_object(tran, objects2[j][k])); - new_objects2.push_back(objtemp2); - } - } - - new_images.swap(images); - new_objects.swap(objects); - new_objects2.swap(objects2); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_array_type, - typename EXP, - typename T - > - void add_image_rotations ( - const matrix_exp<EXP>& angles, - image_array_type& images, - std::vector<std::vector<T> >& objects - ) - { - std::vector<std::vector<T> > objects2(objects.size()); - add_image_rotations(angles, images, objects, objects2); - } - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename image_type1, - typename image_type2, - typename pyramid_type, - typename interpolation_type - > - void pyramid_up ( - const image_type1& in_img, - image_type2& out_img, - const pyramid_type& pyr, - const interpolation_type& interp - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( is_same_object(in_img, out_img) == false , - "\t void pyramid_up()" - << "\n\t Invalid inputs were given to this function." - << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) - ); - - if (image_size(in_img) == 0) - { - set_image_size(out_img, 0, 0); - return; - } - - rectangle rect = get_rect(in_img); - rectangle uprect = pyr.rect_up(rect); - if (uprect.is_empty()) - { - set_image_size(out_img, 0, 0); - return; - } - set_image_size(out_img, uprect.bottom()+1, uprect.right()+1); - - resize_image(in_img, out_img, interp); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type1, - typename image_type2, - typename pyramid_type - > - void pyramid_up ( - const image_type1& in_img, - image_type2& out_img, - const pyramid_type& pyr - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( is_same_object(in_img, out_img) == false , - "\t void pyramid_up()" - << "\n\t Invalid inputs were given to this function." - << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) - ); - - pyramid_up(in_img, out_img, pyr, interpolate_bilinear()); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type, - typename pyramid_type - > - void pyramid_up ( - image_type& img, - const pyramid_type& pyr - ) - { - image_type temp; - pyramid_up(img, temp, pyr); - swap(temp, img); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type - > - void pyramid_up ( - image_type& img - ) - { - pyramid_down<2> pyr; - pyramid_up(img, pyr); - } - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - struct chip_dims - { - chip_dims ( - unsigned long rows_, - unsigned long cols_ - ) : rows(rows_), cols(cols_) { } - - unsigned long rows; - unsigned long cols; - }; - - struct chip_details - { - chip_details() : angle(0), rows(0), cols(0) {} - chip_details(const rectangle& rect_) : rect(rect_),angle(0), rows(rect_.height()), cols(rect_.width()) {} - chip_details(const drectangle& rect_) : rect(rect_),angle(0), - rows((unsigned long)(rect_.height()+0.5)), cols((unsigned long)(rect_.width()+0.5)) {} - chip_details(const drectangle& rect_, unsigned long size) : rect(rect_),angle(0) - { compute_dims_from_size(size); } - chip_details(const drectangle& rect_, unsigned long size, double angle_) : rect(rect_),angle(angle_) - { compute_dims_from_size(size); } - - chip_details(const drectangle& rect_, const chip_dims& dims) : - rect(rect_),angle(0),rows(dims.rows), cols(dims.cols) {} - chip_details(const drectangle& rect_, const chip_dims& dims, double angle_) : - rect(rect_),angle(angle_),rows(dims.rows), cols(dims.cols) {} - - template <typename T> - chip_details( - const std::vector<dlib::vector<T,2> >& chip_points, - const std::vector<dlib::vector<T,2> >& img_points, - const chip_dims& dims - ) : - rows(dims.rows), cols(dims.cols) - { - DLIB_CASSERT( chip_points.size() == img_points.size() && chip_points.size() >= 2, - "\t chip_details::chip_details(chip_points,img_points,dims)" - << "\n\t Invalid inputs were given to this function." - << "\n\t chip_points.size(): " << chip_points.size() - << "\n\t img_points.size(): " << img_points.size() - ); - - const point_transform_affine tform = find_similarity_transform(chip_points,img_points); - dlib::vector<double,2> p(1,0); - p = tform.get_m()*p; - - // There are only 3 things happening in a similarity transform. There is a - // rescaling, a rotation, and a translation. So here we pick out the scale and - // rotation parameters. - angle = std::atan2(p.y(),p.x()); - // Note that the translation and scale part are represented by the extraction - // rectangle. So here we build the appropriate rectangle. - const double scale = length(p); - rect = centered_drect(tform(point(dims.cols,dims.rows)/2.0), - dims.cols*scale, - dims.rows*scale); - } - - - drectangle rect; - double angle; - unsigned long rows; - unsigned long cols; - - inline unsigned long size() const - { - return rows*cols; - } - - private: - void compute_dims_from_size ( - unsigned long size - ) - { - const double relative_size = std::sqrt(size/(double)rect.area()); - rows = static_cast<unsigned long>(rect.height()*relative_size + 0.5); - cols = static_cast<unsigned long>(size/(double)rows + 0.5); - rows = std::max(1ul,rows); - cols = std::max(1ul,cols); - } - }; - -// ---------------------------------------------------------------------------------------- - - inline point_transform_affine get_mapping_to_chip ( - const chip_details& details - ) - { - std::vector<dlib::vector<double,2> > from, to; - point p1(0,0); - point p2(details.cols-1,0); - point p3(details.cols-1, details.rows-1); - to.push_back(p1); - from.push_back(rotate_point<double>(center(details.rect),details.rect.tl_corner(),details.angle)); - to.push_back(p2); - from.push_back(rotate_point<double>(center(details.rect),details.rect.tr_corner(),details.angle)); - to.push_back(p3); - from.push_back(rotate_point<double>(center(details.rect),details.rect.br_corner(),details.angle)); - return find_affine_transform(from, to); - } - -// ---------------------------------------------------------------------------------------- - - inline full_object_detection map_det_to_chip( - const full_object_detection& det, - const chip_details& details - ) - { - point_transform_affine tform = get_mapping_to_chip(details); - full_object_detection res(det); - // map the parts - for (unsigned long l = 0; l < det.num_parts(); ++l) - { - if (det.part(l) != OBJECT_PART_NOT_PRESENT) - res.part(l) = tform(det.part(l)); - else - res.part(l) = OBJECT_PART_NOT_PRESENT; - } - // map the main rectangle - rectangle rect; - rect += tform(det.get_rect().tl_corner()); - rect += tform(det.get_rect().tr_corner()); - rect += tform(det.get_rect().bl_corner()); - rect += tform(det.get_rect().br_corner()); - res.get_rect() = rect; - return res; - } - -// ---------------------------------------------------------------------------------------- - - namespace impl - { - template < - typename image_type1, - typename image_type2 - > - void basic_extract_image_chip ( - const image_type1& img, - const rectangle& location, - image_type2& chip - ) - /*! - ensures - - This function doesn't do any scaling or rotating. It just pulls out the - chip in the given rectangle. This also means the output image has the - same dimensions as the location rectangle. - !*/ - { - const_image_view<image_type1> vimg(img); - image_view<image_type2> vchip(chip); - - vchip.set_size(location.height(), location.width()); - - // location might go outside img so clip it - rectangle area = location.intersect(get_rect(img)); - - // find the part of the chip that corresponds to area in img. - rectangle chip_area = translate_rect(area, -location.tl_corner()); - - zero_border_pixels(chip, chip_area); - // now pull out the contents of area/chip_area. - for (long r = chip_area.top(), rr = area.top(); r <= chip_area.bottom(); ++r,++rr) - { - for (long c = chip_area.left(), cc = area.left(); c <= chip_area.right(); ++c,++cc) - { - assign_pixel(vchip[r][c], vimg[rr][cc]); - } - } - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type1, - typename image_type2, - typename interpolation_type - > - void extract_image_chips ( - const image_type1& img, - const std::vector<chip_details>& chip_locations, - dlib::array<image_type2>& chips, - const interpolation_type& interp - ) - { - // make sure requires clause is not broken -#ifdef ENABLE_ASSERTS - for (unsigned long i = 0; i < chip_locations.size(); ++i) - { - DLIB_CASSERT(chip_locations[i].size() != 0 && - chip_locations[i].rect.is_empty() == false, - "\t void extract_image_chips()" - << "\n\t Invalid inputs were given to this function." - << "\n\t chip_locations["<<i<<"].size(): " << chip_locations[i].size() - << "\n\t chip_locations["<<i<<"].rect.is_empty(): " << chip_locations[i].rect.is_empty() - ); - } -#endif - - pyramid_down<2> pyr; - long max_depth = 0; - // If the chip is supposed to be much smaller than the source subwindow then you - // can't just extract it using bilinear interpolation since at a high enough - // downsampling amount it would effectively turn into nearest neighbor - // interpolation. So we use an image pyramid to make sure the interpolation is - // fast but also high quality. The first thing we do is figure out how deep the - // image pyramid needs to be. - rectangle bounding_box; - for (unsigned long i = 0; i < chip_locations.size(); ++i) - { - long depth = 0; - double grow = 2; - drectangle rect = pyr.rect_down(chip_locations[i].rect); - while (rect.area() > chip_locations[i].size()) - { - rect = pyr.rect_down(rect); - ++depth; - // We drop the image size by a factor of 2 each iteration and then assume a - // border of 2 pixels is needed to avoid any border effects of the crop. - grow = grow*2 + 2; - } - drectangle rot_rect; - const vector<double,2> cent = center(chip_locations[i].rect); - rot_rect += rotate_point<double>(cent,chip_locations[i].rect.tl_corner(),chip_locations[i].angle); - rot_rect += rotate_point<double>(cent,chip_locations[i].rect.tr_corner(),chip_locations[i].angle); - rot_rect += rotate_point<double>(cent,chip_locations[i].rect.bl_corner(),chip_locations[i].angle); - rot_rect += rotate_point<double>(cent,chip_locations[i].rect.br_corner(),chip_locations[i].angle); - bounding_box += grow_rect(rot_rect, grow).intersect(get_rect(img)); - max_depth = std::max(depth,max_depth); - } - //std::cout << "max_depth: " << max_depth << std::endl; - //std::cout << "crop amount: " << bounding_box.area()/(double)get_rect(img).area() << std::endl; - - // now make an image pyramid - dlib::array<array2d<typename image_traits<image_type1>::pixel_type> > levels(max_depth); - if (levels.size() != 0) - pyr(sub_image(img,bounding_box),levels[0]); - for (unsigned long i = 1; i < levels.size(); ++i) - pyr(levels[i-1],levels[i]); - - std::vector<dlib::vector<double,2> > from, to; - - // now pull out the chips - chips.resize(chip_locations.size()); - for (unsigned long i = 0; i < chips.size(); ++i) - { - // If the chip doesn't have any rotation or scaling then use the basic version - // of chip extraction that just does a fast copy. - if (chip_locations[i].angle == 0 && - chip_locations[i].rows == chip_locations[i].rect.height() && - chip_locations[i].cols == chip_locations[i].rect.width()) - { - impl::basic_extract_image_chip(img, chip_locations[i].rect, chips[i]); - } - else - { - set_image_size(chips[i], chip_locations[i].rows, chip_locations[i].cols); - - // figure out which level in the pyramid to use to extract the chip - int level = -1; - drectangle rect = translate_rect(chip_locations[i].rect, -bounding_box.tl_corner()); - while (pyr.rect_down(rect).area() > chip_locations[i].size()) - { - ++level; - rect = pyr.rect_down(rect); - } - - // find the appropriate transformation that maps from the chip to the input - // image - from.clear(); - to.clear(); - from.push_back(get_rect(chips[i]).tl_corner()); to.push_back(rotate_point<double>(center(rect),rect.tl_corner(),chip_locations[i].angle)); - from.push_back(get_rect(chips[i]).tr_corner()); to.push_back(rotate_point<double>(center(rect),rect.tr_corner(),chip_locations[i].angle)); - from.push_back(get_rect(chips[i]).bl_corner()); to.push_back(rotate_point<double>(center(rect),rect.bl_corner(),chip_locations[i].angle)); - point_transform_affine trns = find_affine_transform(from,to); - - // now extract the actual chip - if (level == -1) - transform_image(sub_image(img,bounding_box),chips[i],interp,trns); - else - transform_image(levels[level],chips[i],interp,trns); - } - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type1, - typename image_type2 - > - void extract_image_chips( - const image_type1& img, - const std::vector<chip_details>& chip_locations, - dlib::array<image_type2>& chips - ) - { - extract_image_chips(img, chip_locations, chips, interpolate_bilinear()); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type1, - typename image_type2, - typename interpolation_type - > - void extract_image_chip ( - const image_type1& img, - const chip_details& location, - image_type2& chip, - const interpolation_type& interp - ) - { - // If the chip doesn't have any rotation or scaling then use the basic version of - // chip extraction that just does a fast copy. - if (location.angle == 0 && - location.rows == location.rect.height() && - location.cols == location.rect.width()) - { - impl::basic_extract_image_chip(img, location.rect, chip); - } - else - { - std::vector<chip_details> chip_locations(1,location); - dlib::array<image_type2> chips; - extract_image_chips(img, chip_locations, chips, interp); - swap(chips[0], chip); - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type1, - typename image_type2 - > - void extract_image_chip ( - const image_type1& img, - const chip_details& location, - image_type2& chip - ) - { - extract_image_chip(img, location, chip, interpolate_bilinear()); - } - -// ---------------------------------------------------------------------------------------- - - inline chip_details get_face_chip_details ( - const full_object_detection& det, - const unsigned long size = 200, - const double padding = 0.2 - ) - { - DLIB_CASSERT(det.num_parts() == 68 || det.num_parts() == 5, - "\t chip_details get_face_chip_details()" - << "\n\t You have to give either a 5 point or 68 point face landmarking output to this function. " - << "\n\t det.num_parts(): " << det.num_parts() - ); - DLIB_CASSERT(padding >= 0 && size > 0, - "\t chip_details get_face_chip_details()" - << "\n\t Invalid inputs were given to this function." - << "\n\t padding: " << padding - << "\n\t size: " << size - ); - - - std::vector<dpoint> from_points, to_points; - if (det.num_parts() == 5) - { - dpoint p0(0.8595674595992, 0.2134981538014); - dpoint p1(0.6460604764104, 0.2289674387677); - dpoint p2(0.1205750620789, 0.2137274526848); - dpoint p3(0.3340850613712, 0.2290642403242); - dpoint p4(0.4901123135679, 0.6277975316475); - - - p0 = (padding+p0)/(2*padding+1); - p1 = (padding+p1)/(2*padding+1); - p2 = (padding+p2)/(2*padding+1); - p3 = (padding+p3)/(2*padding+1); - p4 = (padding+p4)/(2*padding+1); - - from_points.push_back(p0*size); - to_points.push_back(det.part(0)); - - from_points.push_back(p1*size); - to_points.push_back(det.part(1)); - - from_points.push_back(p2*size); - to_points.push_back(det.part(2)); - - from_points.push_back(p3*size); - to_points.push_back(det.part(3)); - - from_points.push_back(p4*size); - to_points.push_back(det.part(4)); - } - else - { - // Average positions of face points 17-67 - const double mean_face_shape_x[] = { - 0.000213256, 0.0752622, 0.18113, 0.29077, 0.393397, 0.586856, 0.689483, 0.799124, - 0.904991, 0.98004, 0.490127, 0.490127, 0.490127, 0.490127, 0.36688, 0.426036, - 0.490127, 0.554217, 0.613373, 0.121737, 0.187122, 0.265825, 0.334606, 0.260918, - 0.182743, 0.645647, 0.714428, 0.793132, 0.858516, 0.79751, 0.719335, 0.254149, - 0.340985, 0.428858, 0.490127, 0.551395, 0.639268, 0.726104, 0.642159, 0.556721, - 0.490127, 0.423532, 0.338094, 0.290379, 0.428096, 0.490127, 0.552157, 0.689874, - 0.553364, 0.490127, 0.42689 - }; - const double mean_face_shape_y[] = { - 0.106454, 0.038915, 0.0187482, 0.0344891, 0.0773906, 0.0773906, 0.0344891, - 0.0187482, 0.038915, 0.106454, 0.203352, 0.307009, 0.409805, 0.515625, 0.587326, - 0.609345, 0.628106, 0.609345, 0.587326, 0.216423, 0.178758, 0.179852, 0.231733, - 0.245099, 0.244077, 0.231733, 0.179852, 0.178758, 0.216423, 0.244077, 0.245099, - 0.780233, 0.745405, 0.727388, 0.742578, 0.727388, 0.745405, 0.780233, 0.864805, - 0.902192, 0.909281, 0.902192, 0.864805, 0.784792, 0.778746, 0.785343, 0.778746, - 0.784792, 0.824182, 0.831803, 0.824182 - }; - - COMPILE_TIME_ASSERT(sizeof(mean_face_shape_x)/sizeof(double) == 68-17); - - for (unsigned long i = 17; i < det.num_parts(); ++i) - { - // Ignore the lower lip - if ((55 <= i && i <= 59) || (65 <= i && i <= 67)) - continue; - // Ignore the eyebrows - if (17 <= i && i <= 26) - continue; - - dpoint p; - p.x() = (padding+mean_face_shape_x[i-17])/(2*padding+1); - p.y() = (padding+mean_face_shape_y[i-17])/(2*padding+1); - from_points.push_back(p*size); - to_points.push_back(det.part(i)); - } - } - - return chip_details(from_points, to_points, chip_dims(size,size)); - } - -// ---------------------------------------------------------------------------------------- - - inline std::vector<chip_details> get_face_chip_details ( - const std::vector<full_object_detection>& dets, - const unsigned long size = 200, - const double padding = 0.2 - ) - { - std::vector<chip_details> res; - res.reserve(dets.size()); - for (unsigned long i = 0; i < dets.size(); ++i) - res.push_back(get_face_chip_details(dets[i], size, padding)); - return res; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type - > - image_type jitter_image( - const image_type& img, - dlib::rand& rnd - ) - { - DLIB_CASSERT(num_rows(img)*num_columns(img) != 0); - DLIB_CASSERT(num_rows(img)==num_columns(img)); - - const double max_rotation_degrees = 3; - const double min_object_height = 0.97; - const double max_object_height = 0.99999; - const double translate_amount = 0.02; - - - const auto rect = shrink_rect(get_rect(img),3); - - // perturb the location of the crop by a small fraction of the object's size. - const point rand_translate = dpoint(rnd.get_double_in_range(-translate_amount,translate_amount)*rect.width(), - rnd.get_double_in_range(-translate_amount,translate_amount)*rect.height()); - - // perturb the scale of the crop by a fraction of the object's size - const double rand_scale_perturb = rnd.get_double_in_range(min_object_height, max_object_height); - - const long box_size = rect.height()/rand_scale_perturb; - const auto crop_rect = centered_rect(center(rect)+rand_translate, box_size, box_size); - const double angle = rnd.get_double_in_range(-max_rotation_degrees, max_rotation_degrees)*pi/180; - image_type crop; - extract_image_chip(img, chip_details(crop_rect, chip_dims(img.nr(),img.nc()), angle), crop); - if (rnd.get_random_double() > 0.5) - flip_image_left_right(crop); - - return crop; - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_INTERPOlATIONh_ - diff --git a/ml/dlib/dlib/image_transforms/interpolation_abstract.h b/ml/dlib/dlib/image_transforms/interpolation_abstract.h deleted file mode 100644 index f2da2fb02..000000000 --- a/ml/dlib/dlib/image_transforms/interpolation_abstract.h +++ /dev/null @@ -1,1480 +0,0 @@ -// Copyright (C) 2012 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_INTERPOlATION_ABSTRACT_ -#ifdef DLIB_INTERPOlATION_ABSTRACT_ - -#include "../pixel.h" -#include "../image_processing/full_object_detection_abstract.h" -#include "../image_processing/generic_image.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - class interpolate_nearest_neighbor - { - /*! - WHAT THIS OBJECT REPRESENTS - This object is a tool for performing nearest neighbor interpolation - on an image. - !*/ - - public: - - template < - typename image_view_type, - typename pixel_type - > - bool operator() ( - const image_view_type& img, - const dlib::point& p, - pixel_type& result - ) const; - /*! - requires - - image_view_type == an image_view or const_image_view object. - - pixel_traits<typename image_view_type::pixel_type>::has_alpha == false - - pixel_traits<pixel_type> is defined - ensures - - if (p is located inside img) then - - #result == img[p.y()][p.x()] - (This assignment is done using assign_pixel(#result, img[p.y()][p.x()]), - therefore any necessary color space conversion will be performed) - - returns true - - else - - returns false - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - class interpolate_bilinear - { - - /*! - WHAT THIS OBJECT REPRESENTS - This object is a tool for performing bilinear interpolation - on an image. This is performed by looking at the 4 pixels - nearest to a point and deriving an interpolated value from them. - !*/ - - public: - - template < - typename T, - typename image_view_type, - typename pixel_type - > - bool operator() ( - const image_view_type& img, - const dlib::vector<T,2>& p, - pixel_type& result - ) const; - /*! - requires - - image_view_type == an image_view or const_image_view object - - pixel_traits<typename image_view_type::pixel_type>::has_alpha == false - - pixel_traits<pixel_type> is defined - ensures - - if (there is an interpolatable image location at point p in img) then - - #result == the interpolated pixel value from img at point p. - - assign_pixel() will be used to write to #result, therefore any - necessary color space conversion will be performed. - - returns true - - if img contains RGB pixels then the interpolation will be in color. - Otherwise, the interpolation will be performed in a grayscale mode. - - else - - returns false - !*/ - }; - -// ---------------------------------------------------------------------------------------- - - class interpolate_quadratic - { - /*! - WHAT THIS OBJECT REPRESENTS - This object is a tool for performing quadratic interpolation - on an image. This is performed by looking at the 9 pixels - nearest to a point and deriving an interpolated value from them. - !*/ - - public: - - template < - typename T, - typename image_view_type, - typename pixel_type - > - bool operator() ( - const image_view_type& img, - const dlib::vector<T,2>& p, - pixel_type& result - ) const; - /*! - requires - - image_view_type == an image_view or const_image_view object. - - pixel_traits<typename image_view_type::pixel_type>::has_alpha == false - - pixel_traits<pixel_type> is defined - ensures - - if (there is an interpolatable image location at point p in img) then - - #result == the interpolated pixel value from img at point p - - assign_pixel() will be used to write to #result, therefore any - necessary color space conversion will be performed. - - returns true - - if img contains RGB pixels then the interpolation will be in color. - Otherwise, the interpolation will be performed in a grayscale mode. - - else - - returns false - !*/ - }; - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - class black_background - { - /*! - WHAT THIS OBJECT REPRESENTS - This is a function object which simply sets a pixel - to have a black value. - !*/ - - public: - template <typename pixel_type> - void operator() ( pixel_type& p) const { assign_pixel(p, 0); } - }; - -// ---------------------------------------------------------------------------------------- - - class white_background - { - /*! - WHAT THIS OBJECT REPRESENTS - This is a function object which simply sets a pixel - to have a white value. - !*/ - - public: - template <typename pixel_type> - void operator() ( pixel_type& p) const { assign_pixel(p, 255); } - }; - -// ---------------------------------------------------------------------------------------- - - class no_background - { - /*! - WHAT THIS OBJECT REPRESENTS - This is a function object which does nothing. It is useful - when used with the transform_image() routine defined below - if no modification of uninterpolated output pixels is desired. - !*/ - public: - template <typename pixel_type> - void operator() ( pixel_type& ) const { } - }; - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename image_type1, - typename image_type2, - typename interpolation_type, - typename point_mapping_type, - typename background_type - > - void transform_image ( - const image_type1& in_img, - image_type2& out_img, - const interpolation_type& interp, - const point_mapping_type& map_point, - const background_type& set_background, - const rectangle& area - ); - /*! - requires - - image_type1 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - image_type2 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - interpolation_type == interpolate_nearest_neighbor, interpolate_bilinear, - interpolate_quadratic, or a type with a compatible interface. - - map_point should be a function which takes dlib::vector<T,2> objects and - returns dlib::vector<T,2> objects. An example is point_transform_affine. - - set_background should be a function which can take a single argument of - type image_traits<image_type2>::pixel_type. Examples are black_background, - white_background, and no_background. - - get_rect(out_img).contains(area) == true - - is_same_object(in_img, out_img) == false - ensures - - The map_point function defines a mapping from pixels in out_img to pixels - in in_img. transform_image() uses this mapping, along with the supplied - interpolation routine interp, to fill the region of out_img defined by - area with an interpolated copy of in_img. - - This function does not change the size of out_img. - - Only pixels inside the region defined by area in out_img are modified. - - For all locations r and c such that area.contains(c,r) but have no corresponding - locations in in_img: - - set_background(out_img[r][c]) is invoked - (i.e. some parts of out_img might correspond to areas outside in_img and - therefore can't supply interpolated values. In these cases, these - pixels can be assigned a value by the supplied set_background() routine) - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type1, - typename image_type2, - typename interpolation_type, - typename point_mapping_type, - typename background_type - > - void transform_image ( - const image_type1& in_img, - image_type2& out_img, - const interpolation_type& interp, - const point_mapping_type& map_point, - const background_type& set_background - ); - /*! - requires - - image_type1 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - image_type2 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - interpolation_type == interpolate_nearest_neighbor, interpolate_bilinear, - interpolate_quadratic, or a type with a compatible interface. - - map_point should be a function which takes dlib::vector<T,2> objects and - returns dlib::vector<T,2> objects. An example is point_transform_affine. - - set_background should be a function which can take a single argument of - type image_traits<image_type2>::pixel_type. Examples are black_background, white_background, - and no_background. - - is_same_object(in_img, out_img) == false - ensures - - performs: - transform_image(in_img, out_img, interp, map_point, set_background, get_rect(out_img)); - (i.e. runs transform_image() on the entire out_img) - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type1, - typename image_type2, - typename interpolation_type, - typename point_mapping_type - > - void transform_image ( - const image_type1& in_img, - image_type2& out_img, - const interpolation_type& interp, - const point_mapping_type& map_point - ); - /*! - requires - - image_type1 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - image_type2 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - interpolation_type == interpolate_nearest_neighbor, interpolate_bilinear, - interpolate_quadratic, or a type with a compatible interface. - - map_point should be a function which takes dlib::vector<T,2> objects and - returns dlib::vector<T,2> objects. An example is point_transform_affine. - - is_same_object(in_img, out_img) == false - ensures - - performs: - transform_image(in_img, out_img, interp, map_point, black_background(), get_rect(out_img)); - (i.e. runs transform_image() on the entire out_img and sets non-interpolated - pixels to black) - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type1, - typename image_type2, - typename interpolation_type - > - point_transform_affine rotate_image ( - const image_type1& in_img, - image_type2& out_img, - double angle, - const interpolation_type& interp - ); - /*! - requires - - image_type1 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - image_type2 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - interpolation_type == interpolate_nearest_neighbor, interpolate_bilinear, - interpolate_quadratic, or a type with a compatible interface. - - is_same_object(in_img, out_img) == false - ensures - - #out_img == a copy of in_img which has been rotated angle radians counter clockwise. - The rotation is performed with respect to the center of the image. - - Parts of #out_img which have no corresponding locations in in_img are set to black. - - uses the supplied interpolation routine interp to perform the necessary - pixel interpolation. - - returns a transformation object that maps points in in_img into their corresponding - location in #out_img. - !*/ - -// ---------------------------------------------------------------------------------------- - - - template < - typename image_type1, - typename image_type2 - > - point_transform_affine rotate_image ( - const image_type1& in_img, - image_type2& out_img, - double angle - ); - /*! - requires - - image_type1 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - image_type2 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - pixel_traits<typename image_traits<image_type1>::pixel_type>::has_alpha == false - - is_same_object(in_img, out_img) == false - ensures - - #out_img == a copy of in_img which has been rotated angle radians counter clockwise. - The rotation is performed with respect to the center of the image. - - Parts of #out_img which have no corresponding locations in in_img are set to black. - - uses the interpolate_quadratic object to perform the necessary pixel interpolation. - - returns a transformation object that maps points in in_img into their corresponding - location in #out_img. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type1, - typename image_type2, - typename interpolation_type - > - void resize_image ( - const image_type1& in_img, - image_type2& out_img, - const interpolation_type& interp - ); - /*! - requires - - image_type1 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - image_type2 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - interpolation_type == interpolate_nearest_neighbor, interpolate_bilinear, - interpolate_quadratic, or a type with a compatible interface. - - is_same_object(in_img, out_img) == false - ensures - - #out_img == A copy of in_img which has been stretched so that it - fits exactly into out_img. - - The size of out_img is not modified. I.e. - - #out_img.nr() == out_img.nr() - - #out_img.nc() == out_img.nc() - - uses the supplied interpolation routine interp to perform the necessary - pixel interpolation. - !*/ - -// ---------------------------------------------------------------------------------------- - - - template < - typename image_type1, - typename image_type2 - > - void resize_image ( - const image_type1& in_img, - image_type2& out_img - ); - /*! - requires - - image_type1 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - image_type2 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - pixel_traits<typename image_traits<image_type1>::pixel_type>::has_alpha == false - - is_same_object(in_img, out_img) == false - ensures - - #out_img == A copy of in_img which has been stretched so that it - fits exactly into out_img. - - The size of out_img is not modified. I.e. - - #out_img.nr() == out_img.nr() - - #out_img.nc() == out_img.nc() - - Uses the bilinear interpolation to perform the necessary pixel interpolation. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type - > - void resize_image ( - double size_scale, - image_type& img - ); - /*! - requires - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - pixel_traits<typename image_traits<image_type>::pixel_type>::has_alpha == false - ensures - - Resizes img so that each of it's dimensions are size_scale times larger than img. - In particular, we will have: - - #img.nr() == std::round(size_scale*img.nr()) - - #img.nc() == std::round(size_scale*img.nc()) - - #img == a bilinearly interpolated copy of the input image. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type1, - typename image_type2 - > - point_transform_affine flip_image_left_right ( - const image_type1& in_img, - image_type2& out_img - ); - /*! - requires - - image_type1 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - image_type2 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - is_same_object(in_img, out_img) == false - ensures - - #out_img.nr() == in_img.nr() - - #out_img.nc() == in_img.nc() - - #out_img == a copy of in_img which has been flipped from left to right. - (i.e. it is flipped as if viewed though a mirror) - - returns a transformation object that maps points in in_img into their - corresponding location in #out_img. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type - > - point_transform_affine flip_image_left_right ( - image_type& img - ); - /*! - requires - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - ensures - - This function is identical to the above version of flip_image_left_right() - except that it operates in-place. - - #img.nr() == img.nr() - - #img.nc() == img.nc() - - #img == a copy of img which has been flipped from left to right. - (i.e. it is flipped as if viewed though a mirror) - - returns a transformation object that maps points in img into their - corresponding location in #img. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_array_type, - typename T - > - void add_image_left_right_flips ( - image_array_type& images, - std::vector<std::vector<T> >& objects - ); - /*! - requires - - image_array_type == a dlib::array or std::vector of image objects that each - implement the interface defined in dlib/image_processing/generic_image.h - - T == rectangle, full_object_detection, or mmod_rect - - images.size() == objects.size() - ensures - - This function computes all the left/right flips of the contents of images and - then appends them onto the end of the images array. It also finds the - left/right flips of the rectangles in objects and similarly appends them into - objects. That is, we assume objects[i] is the set of bounding boxes in - images[i] and we flip the bounding boxes so that they still bound the same - objects in the new flipped images. - - #images.size() == images.size()*2 - - #objects.size() == objects.size()*2 - - All the original elements of images and objects are left unmodified. That - is, this function only appends new elements to each of these containers. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_array_type, - typename T, - typename U - > - void add_image_left_right_flips ( - image_array_type& images, - std::vector<std::vector<T> >& objects, - std::vector<std::vector<U> >& objects2 - ); - /*! - requires - - image_array_type == a dlib::array or std::vector of image objects that each - implement the interface defined in dlib/image_processing/generic_image.h - - images.size() == objects.size() - - images.size() == objects2.size() - - T == rectangle, full_object_detection, or mmod_rect - - U == rectangle, full_object_detection, or mmod_rect - ensures - - This function computes all the left/right flips of the contents of images and - then appends them onto the end of the images array. It also finds the - left/right flips of the rectangles in objects and objects2 and similarly - appends them into objects and objects2 respectively. That is, we assume - objects[i] is the set of bounding boxes in images[i] and we flip the bounding - boxes so that they still bound the same objects in the new flipped images. - We similarly flip the boxes in objects2. - - #images.size() == images.size()*2 - - #objects.size() == objects.size()*2 - - #objects2.size() == objects2.size()*2 - - All the original elements of images, objects, and objects2 are left unmodified. - That is, this function only appends new elements to each of these containers. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_array_type, - typename EXP, - typename T, - typename U - > - void add_image_rotations ( - const matrix_exp<EXP>& angles, - image_array_type& images, - std::vector<std::vector<T> >& objects, - std::vector<std::vector<U> >& objects2 - ); - /*! - requires - - image_array_type == a dlib::array or std::vector of image objects that each - implement the interface defined in dlib/image_processing/generic_image.h - - is_vector(angles) == true - - angles.size() > 0 - - images.size() == objects.size() - - images.size() == objects2.size() - - T == rectangle, full_object_detection, or mmod_rect - - U == rectangle, full_object_detection, or mmod_rect - ensures - - This function computes angles.size() different rotations of all the given - images and then replaces the contents of images with those rotations of the - input dataset. We will also adjust the rectangles inside objects and - objects2 so that they still bound the same objects in the new rotated images. - That is, we assume objects[i] and objects2[i] are bounding boxes for things - in images[i]. So we will adjust the positions of the boxes in objects and - objects2 accordingly. - - The elements of angles are interpreted as angles in radians and we will - rotate the images around their center using the values in angles. Moreover, - the rotation is done counter clockwise. - - #images.size() == images.size()*angles.size() - - #objects.size() == objects.size()*angles.size() - - #objects2.size() == objects2.size()*angles.size() - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_array_type, - typename EXP, - typename T - > - void add_image_rotations ( - const matrix_exp<EXP>& angles, - image_array_type& images, - std::vector<std::vector<T> >& objects - ); - /*! - requires - - image_array_type == a dlib::array or std::vector of image objects that each - implement the interface defined in dlib/image_processing/generic_image.h - - is_vector(angles) == true - - angles.size() > 0 - - images.size() == objects.size() - - T == rectangle, full_object_detection, or mmod_rect - ensures - - This function is identical to the add_image_rotations() define above except - that it doesn't have objects2 as an argument. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_array_type - > - void flip_image_dataset_left_right ( - image_array_type& images, - std::vector<std::vector<rectangle> >& objects - ); - /*! - requires - - image_array_type == a dlib::array or std::vector of image objects that each - implement the interface defined in dlib/image_processing/generic_image.h - - images.size() == objects.size() - ensures - - This function replaces each image in images with the left/right flipped - version of the image. Therefore, #images[i] will contain the left/right - flipped version of images[i]. It also flips all the rectangles in objects so - that they still bound the same visual objects in each image. - - #images.size() == image.size() - - #objects.size() == objects.size() - - for all valid i: - #objects[i].size() == objects[i].size() - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_array_type - > - void flip_image_dataset_left_right ( - image_array_type& images, - std::vector<std::vector<rectangle> >& objects, - std::vector<std::vector<rectangle> >& objects2 - ); - /*! - requires - - image_array_type == a dlib::array or std::vector of image objects that each - implement the interface defined in dlib/image_processing/generic_image.h - - images.size() == objects.size() - - images.size() == objects2.size() - ensures - - This function replaces each image in images with the left/right flipped - version of the image. Therefore, #images[i] will contain the left/right - flipped version of images[i]. It also flips all the rectangles in objects - and objects2 so that they still bound the same visual objects in each image. - - #images.size() == image.size() - - #objects.size() == objects.size() - - #objects2.size() == objects2.size() - - for all valid i: - #objects[i].size() == objects[i].size() - - for all valid i: - #objects2[i].size() == objects2[i].size() - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename pyramid_type, - typename image_array_type - > - void upsample_image_dataset ( - image_array_type& images, - std::vector<std::vector<rectangle> >& objects, - unsigned long max_image_size = std::numeric_limits<unsigned long>::max() - ); - /*! - requires - - image_array_type == a dlib::array or std::vector of image objects that each - implement the interface defined in dlib/image_processing/generic_image.h - - images.size() == objects.size() - ensures - - This function replaces each image in images with an upsampled version of that - image. Each image is upsampled using pyramid_up() and the given - pyramid_type. Therefore, #images[i] will contain the larger upsampled - version of images[i]. It also adjusts all the rectangles in objects so that - they still bound the same visual objects in each image. - - Input images already containing more than max_image_size pixels are not upsampled. - - #images.size() == image.size() - - #objects.size() == objects.size() - - for all valid i: - #objects[i].size() == objects[i].size() - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename pyramid_type, - typename image_array_type - > - void upsample_image_dataset ( - image_array_type& images, - std::vector<std::vector<mmod_rect>>& objects, - unsigned long max_image_size = std::numeric_limits<unsigned long>::max() - ); - /*! - requires - - image_array_type == a dlib::array or std::vector of image objects that each - implement the interface defined in dlib/image_processing/generic_image.h - - images.size() == objects.size() - ensures - - This function replaces each image in images with an upsampled version of that - image. Each image is upsampled using pyramid_up() and the given - pyramid_type. Therefore, #images[i] will contain the larger upsampled - version of images[i]. It also adjusts all the rectangles in objects so that - they still bound the same visual objects in each image. - - Input images already containing more than max_image_size pixels are not upsampled. - - #images.size() == image.size() - - #objects.size() == objects.size() - - for all valid i: - #objects[i].size() == objects[i].size() - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename pyramid_type, - typename image_array_type, - > - void upsample_image_dataset ( - image_array_type& images, - std::vector<std::vector<rectangle> >& objects, - std::vector<std::vector<rectangle> >& objects2, - unsigned long max_image_size = std::numeric_limits<unsigned long>::max() - ); - /*! - requires - - image_array_type == a dlib::array or std::vector of image objects that each - implement the interface defined in dlib/image_processing/generic_image.h - - images.size() == objects.size() - - images.size() == objects2.size() - ensures - - This function replaces each image in images with an upsampled version of that - image. Each image is upsampled using pyramid_up() and the given - pyramid_type. Therefore, #images[i] will contain the larger upsampled - version of images[i]. It also adjusts all the rectangles in objects and - objects2 so that they still bound the same visual objects in each image. - - Input images already containing more than max_image_size pixels are not upsampled. - - #images.size() == image.size() - - #objects.size() == objects.size() - - #objects2.size() == objects2.size() - - for all valid i: - #objects[i].size() == objects[i].size() - - for all valid i: - #objects2[i].size() == objects2[i].size() - !*/ - -// ---------------------------------------------------------------------------------------- - - template <typename image_array_type> - void rotate_image_dataset ( - double angle, - image_array_type& images, - std::vector<std::vector<rectangle> >& objects - ); - /*! - requires - - image_array_type == a dlib::array or std::vector of image objects that each - implement the interface defined in dlib/image_processing/generic_image.h - - images.size() == objects.size() - ensures - - This function replaces each image in images with a rotated version of that - image. In particular, each image is rotated using - rotate_image(original,rotated,angle). Therefore, the images are rotated - angle radians counter clockwise around their centers. That is, #images[i] - will contain the rotated version of images[i]. It also adjusts all - the rectangles in objects so that they still bound the same visual objects in - each image. - - All the rectangles will still have the same sizes and aspect ratios after - rotation. They will simply have had their positions adjusted so they still - fall on the same objects. - - #images.size() == image.size() - - #objects.size() == objects.size() - - for all valid i: - #objects[i].size() == objects[i].size() - !*/ - -// ---------------------------------------------------------------------------------------- - - template <typename image_array_type> - void rotate_image_dataset ( - double angle, - image_array_type& images, - std::vector<std::vector<rectangle> >& objects, - std::vector<std::vector<rectangle> >& objects2 - ); - /*! - requires - - image_array_type == a dlib::array or std::vector of image objects that each - implement the interface defined in dlib/image_processing/generic_image.h - - images.size() == objects.size() - - images.size() == objects2.size() - ensures - - This function replaces each image in images with a rotated version of that - image. In particular, each image is rotated using - rotate_image(original,rotated,angle). Therefore, the images are rotated - angle radians counter clockwise around their centers. That is, #images[i] - will contain the rotated version of images[i]. It also adjusts all - the rectangles in objects and objects2 so that they still bound the same - visual objects in each image. - - All the rectangles will still have the same sizes and aspect ratios after - rotation. They will simply have had their positions adjusted so they still - fall on the same objects. - - #images.size() == image.size() - - #objects.size() == objects.size() - - #objects2.size() == objects2.size() - - for all valid i: - #objects[i].size() == objects[i].size() - - for all valid i: - #objects2[i].size() == objects2[i].size() - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type1, - typename image_type2 - > - void flip_image_up_down ( - const image_type1& in_img, - image_type2& out_img - ); - /*! - requires - - image_type1 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - image_type2 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - is_same_object(in_img, out_img) == false - ensures - - #out_img.nr() == in_img.nr() - - #out_img.nc() == in_img.nc() - - #out_img == a copy of in_img which has been flipped upside down. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type1, - typename image_type2, - typename pyramid_type, - typename interpolation_type - > - void pyramid_up ( - const image_type1& in_img, - image_type2& out_img, - const pyramid_type& pyr, - const interpolation_type& interp - ); - /*! - requires - - image_type1 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - image_type2 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - pyramid_type == a type compatible with the image pyramid objects defined - in dlib/image_transforms/image_pyramid_abstract.h - - interpolation_type == interpolate_nearest_neighbor, interpolate_bilinear, - interpolate_quadratic, or a type with a compatible interface. - - is_same_object(in_img, out_img) == false - ensures - - This function inverts the downsampling transformation performed by pyr(). - In particular, it attempts to make an image, out_img, which would result - in in_img when downsampled with pyr(). - - #out_img == An upsampled copy of in_img. In particular, downsampling - #out_img 1 time with pyr() should result in a final image which looks like - in_img. - - Uses the supplied interpolation routine interp to perform the necessary - pixel interpolation. - - Note that downsampling an image with pyr() and then upsampling it with - pyramid_up() will not necessarily result in a final image which is - the same size as the original. This is because the exact size of the - original image cannot be determined based on the downsampled image. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type1, - typename image_type2, - typename pyramid_type - > - void pyramid_up ( - const image_type1& in_img, - image_type2& out_img, - const pyramid_type& pyr - ); - /*! - requires - - image_type1 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - image_type2 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - pyramid_type == a type compatible with the image pyramid objects defined - in dlib/image_transforms/image_pyramid_abstract.h - - is_same_object(in_img, out_img) == false - ensures - - performs: pyramid_up(in_img, out_img, pyr, interpolate_bilinear()); - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type, - typename pyramid_type - > - void pyramid_up ( - image_type& img, - const pyramid_type& pyr - ); - /*! - requires - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - pyramid_type == a type compatible with the image pyramid objects defined - in dlib/image_transforms/image_pyramid_abstract.h - ensures - - Performs an in-place version of pyramid_up() on the given image. In - particular, this function is equivalent to: - pyramid_up(img, temp, pyr); - temp.swap(img); - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type - > - void pyramid_up ( - image_type& img - ); - /*! - requires - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - ensures - - performs: pyramid_up(img, pyramid_down<2>()); - (i.e. it upsamples the given image and doubles it in size.) - !*/ - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - struct chip_dims - { - /*! - WHAT THIS OBJECT REPRESENTS - This is a simple tool for passing in a pair of row and column values to the - chip_details constructor. - !*/ - - chip_dims ( - unsigned long rows_, - unsigned long cols_ - ) : rows(rows_), cols(cols_) { } - - unsigned long rows; - unsigned long cols; - }; - -// ---------------------------------------------------------------------------------------- - - struct chip_details - { - /*! - WHAT THIS OBJECT REPRESENTS - This object describes where an image chip is to be extracted from within - another image. In particular, it specifies that the image chip is - contained within the rectangle this->rect and that prior to extraction the - image should be rotated counter-clockwise by this->angle radians. Finally, - the extracted chip should have this->rows rows and this->cols columns in it - regardless of the shape of this->rect. This means that the extracted chip - will be stretched to fit via bilinear interpolation when necessary. - !*/ - - chip_details( - ); - /*! - ensures - - #rect.is_empty() == true - - #size() == 0 - - #angle == 0 - - #rows == 0 - - #cols == 0 - !*/ - - chip_details( - const drectangle& rect_ - ); - /*! - ensures - - #rect == rect_ - - #size() == rect_.area() - - #angle == 0 - - #rows == rect_.height() - - #cols == rect_.width() - !*/ - - chip_details( - const rectangle& rect_ - ); - /*! - ensures - - #rect == rect_ - - #size() == rect_.area() - - #angle == 0 - - #rows == rect_.height() - - #cols == rect_.width() - !*/ - - chip_details( - const drectangle& rect_, - unsigned long size_ - ); - /*! - ensures - - #rect == rect_ - - #size() == size_ - - #angle == 0 - - #rows and #cols is set such that the total size of the chip is as close - to size_ as possible but still matches the aspect ratio of rect_. - - As long as size_ and the aspect ratio of of rect_ stays constant then - #rows and #cols will always have the same values. This means that, for - example, if you want all your chips to have the same dimensions then - ensure that size_ is always the same and also that rect_ always has the - same aspect ratio. Otherwise the calculated values of #rows and #cols - may be different for different chips. Alternatively, you can use the - chip_details constructor below that lets you specify the exact values for - rows and cols. - !*/ - - chip_details( - const drectangle& rect_, - unsigned long size_, - double angle_ - ); - /*! - ensures - - #rect == rect_ - - #size() == size_ - - #angle == angle_ - - #rows and #cols is set such that the total size of the chip is as close - to size_ as possible but still matches the aspect ratio of rect_. - - As long as size_ and the aspect ratio of of rect_ stays constant then - #rows and #cols will always have the same values. This means that, for - example, if you want all your chips to have the same dimensions then - ensure that size_ is always the same and also that rect_ always has the - same aspect ratio. Otherwise the calculated values of #rows and #cols - may be different for different chips. Alternatively, you can use the - chip_details constructor below that lets you specify the exact values for - rows and cols. - !*/ - - chip_details( - const drectangle& rect_, - const chip_dims& dims - ); - /*! - ensures - - #rect == rect_ - - #size() == dims.rows*dims.cols - - #angle == 0 - - #rows == dims.rows - - #cols == dims.cols - !*/ - - chip_details( - const drectangle& rect_, - const chip_dims& dims, - double angle_ - ); - /*! - ensures - - #rect == rect_ - - #size() == dims.rows*dims.cols - - #angle == angle_ - - #rows == dims.rows - - #cols == dims.cols - !*/ - - template <typename T> - chip_details( - const std::vector<dlib::vector<T,2> >& chip_points, - const std::vector<dlib::vector<T,2> >& img_points, - const chip_dims& dims - ); - /*! - requires - - chip_points.size() == img_points.size() - - chip_points.size() >= 2 - ensures - - The chip will be extracted such that the pixel locations chip_points[i] - in the chip are mapped to img_points[i] in the original image by a - similarity transform. That is, if you know the pixelwize mapping you - want between the chip and the original image then you use this function - of chip_details constructor to define the mapping. - - #rows == dims.rows - - #cols == dims.cols - - #size() == dims.rows*dims.cols - - #rect and #angle are computed based on the given size of the output chip - (specified by dims) and the similarity transform between the chip and - image (specified by chip_points and img_points). - !*/ - - inline unsigned long size() const { return rows*cols; } - /*! - ensures - - returns the number of pixels in this chip. This is just rows*cols. - !*/ - - drectangle rect; - double angle; - unsigned long rows; - unsigned long cols; - }; - -// ---------------------------------------------------------------------------------------- - - point_transform_affine get_mapping_to_chip ( - const chip_details& details - ); - /*! - ensures - - returns a transformation that maps from the pixels in the original image - to the pixels in the cropped image defined by the given details object. - !*/ - -// ---------------------------------------------------------------------------------------- - - full_object_detection map_det_to_chip ( - const full_object_detection& det, - const chip_details& details - ); - /*! - ensures - - Maps the given detection into the pixel space of the image chip defined by - the given details object. That is, this function returns an object D such - that: - - D.get_rect() == a box that bounds the same thing in the image chip as - det.get_rect() bounds in the original image the chip is extracted from. - - for all valid i: - - D.part(i) == the location in the image chip corresponding to - det.part(i) in the original image. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type1, - typename image_type2, - typename interpolation_type - > - void extract_image_chips ( - const image_type1& img, - const std::vector<chip_details>& chip_locations, - dlib::array<image_type2>& chips, - const interpolation_type& interp - ); - /*! - requires - - image_type1 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - image_type2 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - pixel_traits<typename image_traits<image_type1>::pixel_type>::has_alpha == false - - for all valid i: - - chip_locations[i].rect.is_empty() == false - - chip_locations[i].size() != 0 - - interpolation_type == interpolate_nearest_neighbor, interpolate_bilinear, - interpolate_quadratic, or a type with a compatible interface. - ensures - - This function extracts "chips" from an image. That is, it takes a list of - rectangular sub-windows (i.e. chips) within an image and extracts those - sub-windows, storing each into its own image. It also scales and rotates the - image chips according to the instructions inside each chip_details object. - It uses the interpolation method supplied as a parameter. - - #chips == the extracted image chips - - #chips.size() == chip_locations.size() - - for all valid i: - - #chips[i] == The image chip extracted from the position - chip_locations[i].rect in img. - - #chips[i].nr() == chip_locations[i].rows - - #chips[i].nc() == chip_locations[i].cols - - The image will have been rotated counter-clockwise by - chip_locations[i].angle radians, around the center of - chip_locations[i].rect, before the chip was extracted. - - Any pixels in an image chip that go outside img are set to 0 (i.e. black). - !*/ - - template < - typename image_type1, - typename image_type2 - > - void extract_image_chips ( - const image_type1& img, - const std::vector<chip_details>& chip_locations, - dlib::array<image_type2>& chips - ); - /*! - ensures - - This function is a simple convenience / compatibility wrapper that calls the - above-defined extract_image_chips() function using bilinear interpolation. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type1, - typename image_type2, - typename interpolation_type - > - void extract_image_chip ( - const image_type1& img, - const chip_details& chip_location, - image_type2& chip, - const interpolation_type& interp - ); - /*! - ensures - - This function simply calls extract_image_chips() with a single chip location - and stores the single output chip into #chip. It uses the provided - interpolation method. - !*/ - - template < - typename image_type1, - typename image_type2 - > - void extract_image_chip ( - const image_type1& img, - const chip_details& chip_location, - image_type2& chip - ); - /*! - ensures - - This function is a simple convenience / compatibility wrapper that calls the - above-defined extract_image_chip() function using bilinear interpolation. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type - > - struct sub_image_proxy - { - /*! - REQUIREMENTS ON image_type - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - WHAT THIS OBJECT REPRESENTS - This is a lightweight image object for referencing a subwindow of an image. - It implements the generic image interface and can therefore be used with - any function that expects a generic image, excepting that you cannot change - the size of a sub_image_proxy. - - Note that it only stores a pointer to the image data given to its - constructor and therefore does not perform a copy. Moreover, this means - that an instance of this object becomes invalid after the underlying image - data it references is destroyed. - !*/ - sub_image_proxy ( - T& img, - const rectangle& rect - ); - /*! - ensures - - This object is an image that represents the part of img contained within - rect. If rect is larger than img then rect is cropped so that it does - not go outside img. - !*/ - }; - - template < - typename image_type - > - sub_image_proxy<image_type> sub_image ( - image_type& img, - const rectangle& rect - ); - /*! - requires - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - ensures - - returns sub_image_proxy<image_type>(img,rect) - !*/ - - template <typename T> - sub_image_proxy<some_appropriate_type> sub_image ( - T* img, - long nr, - long nc, - long row_stride - ); - /*! - requires - - img == a pointer to at least nr*row_stride T objects - - nr >= 0 - - nc >= 0 - - row_stride >= 0 - ensures - - This function returns an image that is just a thin wrapper around the given - pointer. It will have the dimensions defined by the supplied longs. To be - precise, this function returns an image object IMG such that: - - image_data(IMG) == img - - num_rows(IMG) == nr - - num_columns(IMG) == nc - - width_step(IMG) == row_stride*sizeof(T) - - IMG contains pixels of type T. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type - > - struct const_sub_image_proxy - { - /*! - REQUIREMENTS ON image_type - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - WHAT THIS OBJECT REPRESENTS - This object is just like sub_image_proxy except that it does not allow the - pixel data to be modified. - !*/ - const_sub_image_proxy ( - const T& img, - const rectangle& rect - ); - /*! - ensures - - This object is an image that represents the part of img contained within - rect. If rect is larger than img then rect is cropped so that it does - not go outside img. - !*/ - }; - - template < - typename image_type - > - const const_sub_image_proxy<image_type> sub_image ( - const image_type& img, - const rectangle& rect - ); - /*! - requires - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - ensures - - returns const_sub_image_proxy<image_type>(img,rect) - !*/ - - template <typename T> - const const_sub_image_proxy<some_appropriate_type> sub_image ( - const T* img, - long nr, - long nc, - long row_stride - ); - /*! - requires - - img == a pointer to at least nr*row_stride T objects - - nr >= 0 - - nc >= 0 - - row_stride >= 0 - ensures - - This function returns an image that is just a thin wrapper around the given - pointer. It will have the dimensions defined by the supplied longs. To be - precise, this function returns an image object IMG such that: - - image_data(IMG) == img - - num_rows(IMG) == nr - - num_columns(IMG) == nc - - width_step(IMG) == row_stride*sizeof(T) - - IMG contains pixels of type T. - !*/ - -// ---------------------------------------------------------------------------------------- - - chip_details get_face_chip_details ( - const full_object_detection& det, - const unsigned long size = 200, - const double padding = 0.2 - ); - /*! - requires - - det.num_parts() == 68 || det.num_parts() == 5 - - size > 0 - - padding >= 0 - ensures - - This function assumes det contains a human face detection with face parts - annotated using the annotation scheme from the iBUG 300-W face landmark - dataset or a 5 point face annotation. Given these assumptions, it creates a - chip_details object that will extract a copy of the face that has been - rotated upright, centered, and scaled to a standard size when given to - extract_image_chip(). - - This function is specifically calibrated to work with one of these models: - - http://dlib.net/files/shape_predictor_5_face_landmarks.dat.bz2 - - http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2 - - The extracted chips will have size rows and columns in them. - - if padding == 0 then the chip will be closely cropped around the face. - Setting larger padding values will result a looser cropping. In particular, - a padding of 0.5 would double the width of the cropped area, a value of 1 - would triple it, and so forth. - - The 5 point face annotation scheme is assumed to be: - - det part 0 == left eye corner, outside part of eye. - - det part 1 == left eye corner, inside part of eye. - - det part 2 == right eye corner, outside part of eye. - - det part 3 == right eye corner, inside part of eye. - - det part 4 == immediately under the nose, right at the top of the philtrum. - !*/ - -// ---------------------------------------------------------------------------------------- - - std::vector<chip_details> get_face_chip_details ( - const std::vector<full_object_detection>& dets, - const unsigned long size = 200, - const double padding = 0.2 - ); - /*! - requires - - for all valid i: - - det[i].num_parts() == 68 - - size > 0 - - padding >= 0 - ensures - - This function is identical to the version of get_face_chip_details() defined - above except that it creates and returns an array of chip_details objects, - one for each input full_object_detection. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type - > - image_type jitter_image( - const image_type& img, - dlib::rand& rnd - ); - /*! - requires - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - pixel_traits<typename image_traits<image_type>::pixel_type>::has_alpha == false - - img.size() > 0 - - img.nr() == img.nc() - ensures - - Randomly jitters the image a little bit and returns this new jittered image. - To be specific, the returned image has the same size as img and will look - generally similar. The difference is that the returned image will have been - slightly rotated, zoomed, and translated. There is also a 50% chance it will - be mirrored left to right. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_INTERPOlATION_ABSTRACT_ - diff --git a/ml/dlib/dlib/image_transforms/label_connected_blobs.h b/ml/dlib/dlib/image_transforms/label_connected_blobs.h deleted file mode 100644 index c25346c76..000000000 --- a/ml/dlib/dlib/image_transforms/label_connected_blobs.h +++ /dev/null @@ -1,188 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_LABEL_CONNeCTED_BLOBS_H_ -#define DLIB_LABEL_CONNeCTED_BLOBS_H_ - -#include "label_connected_blobs_abstract.h" -#include "../geometry.h" -#include <stack> -#include <vector> - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - struct neighbors_8 - { - void operator() ( - const point& p, - std::vector<point>& neighbors - ) const - { - neighbors.push_back(point(p.x()+1,p.y()+1)); - neighbors.push_back(point(p.x()+1,p.y() )); - neighbors.push_back(point(p.x()+1,p.y()-1)); - - neighbors.push_back(point(p.x(),p.y()+1)); - neighbors.push_back(point(p.x(),p.y()-1)); - - neighbors.push_back(point(p.x()-1,p.y()+1)); - neighbors.push_back(point(p.x()-1,p.y() )); - neighbors.push_back(point(p.x()-1,p.y()-1)); - } - }; - - struct neighbors_4 - { - void operator() ( - const point& p, - std::vector<point>& neighbors - ) const - { - neighbors.push_back(point(p.x()+1,p.y())); - neighbors.push_back(point(p.x()-1,p.y())); - neighbors.push_back(point(p.x(),p.y()+1)); - neighbors.push_back(point(p.x(),p.y()-1)); - } - }; - -// ---------------------------------------------------------------------------------------- - - struct connected_if_both_not_zero - { - template <typename image_type> - bool operator() ( - const image_type& img, - const point& a, - const point& b - ) const - { - return (img[a.y()][a.x()] != 0 && img[b.y()][b.x()] != 0); - } - }; - - struct connected_if_equal - { - template <typename image_type> - bool operator() ( - const image_type& img, - const point& a, - const point& b - ) const - { - return (img[a.y()][a.x()] == img[b.y()][b.x()]); - } - }; - -// ---------------------------------------------------------------------------------------- - - struct zero_pixels_are_background - { - template <typename image_type> - bool operator() ( - const image_type& img, - const point& p - ) const - { - return img[p.y()][p.x()] == 0; - } - - }; - - struct nothing_is_background - { - template <typename image_type> - bool operator() ( - const image_type&, - const point& - ) const - { - return false; - } - - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type, - typename label_image_type, - typename background_functor_type, - typename neighbors_functor_type, - typename connected_functor_type - > - unsigned long label_connected_blobs ( - const image_type& img_, - const background_functor_type& is_background, - const neighbors_functor_type& get_neighbors, - const connected_functor_type& is_connected, - label_image_type& label_img_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_same_object(img_, label_img_) == false, - "\t unsigned long label_connected_blobs()" - << "\n\t The input image and output label image can't be the same object." - ); - - const_image_view<image_type> img(img_); - image_view<label_image_type> label_img(label_img_); - - std::stack<point> neighbors; - label_img.set_size(img.nr(), img.nc()); - assign_all_pixels(label_img, 0); - unsigned long next = 1; - - if (img.size() == 0) - return 0; - - const rectangle area = get_rect(img); - - std::vector<point> window; - - for (long r = 0; r < img.nr(); ++r) - { - for (long c = 0; c < img.nc(); ++c) - { - // skip already labeled pixels or background pixels - if (label_img[r][c] != 0 || is_background(img,point(c,r))) - continue; - - label_img[r][c] = next; - - // label all the neighbors of this point - neighbors.push(point(c,r)); - while (neighbors.size() > 0) - { - const point p = neighbors.top(); - neighbors.pop(); - - window.clear(); - get_neighbors(p, window); - - for (unsigned long i = 0; i < window.size(); ++i) - { - if (area.contains(window[i]) && // point in image. - !is_background(img,window[i]) && // isn't background. - label_img[window[i].y()][window[i].x()] == 0 && // haven't already labeled it. - is_connected(img, p, window[i])) // it's connected. - { - label_img[window[i].y()][window[i].x()] = next; - neighbors.push(window[i]); - } - } - } - - ++next; - } - } - - return next; - } -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_LABEL_CONNeCTED_BLOBS_H_ - diff --git a/ml/dlib/dlib/image_transforms/label_connected_blobs_abstract.h b/ml/dlib/dlib/image_transforms/label_connected_blobs_abstract.h deleted file mode 100644 index 5dc984000..000000000 --- a/ml/dlib/dlib/image_transforms/label_connected_blobs_abstract.h +++ /dev/null @@ -1,199 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_LABEL_CONNeCTED_BLOBS_ABSTRACT_H_ -#ifdef DLIB_LABEL_CONNeCTED_BLOBS_ABSTRACT_H_ - -#include "../geometry.h" -#include <vector> -#include "../image_processing/generic_image.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - struct neighbors_8 - { - /*! - WHAT THIS OBJECT REPRESENTS - This object is a pixel neighborhood generating functor for - use with the label_connected_blobs() routine defined below. - !*/ - - void operator() ( - const point& p, - std::vector<point>& neighbors - ) const; - /*! - ensures - - adds the 8 neighboring pixels surrounding p into neighbors - !*/ - }; - - struct neighbors_4 - { - /*! - WHAT THIS OBJECT REPRESENTS - This object is a pixel neighborhood generating functor for - use with the label_connected_blobs() routine defined below. - !*/ - - void operator() ( - const point& p, - std::vector<point>& neighbors - ) const; - /*! - ensures - - adds the 4 neighboring pixels of p into neighbors. These - are the ones immediately to the left, top, right, and bottom. - !*/ - }; - -// ---------------------------------------------------------------------------------------- - - struct connected_if_both_not_zero - { - /*! - WHAT THIS OBJECT REPRESENTS - This object is a pixel connection testing functor for use - with the label_connected_blobs() routine defined below. - !*/ - - template <typename image_view_type> - bool operator() ( - const image_view_type& img, - const point& a, - const point& b - ) const - { - return (img[a.y()][a.x()] != 0 && img[b.y()][b.x()] != 0); - } - }; - - struct connected_if_equal - { - /*! - WHAT THIS OBJECT REPRESENTS - This object is a pixel connection testing functor for use - with the label_connected_blobs() routine defined below. - !*/ - - template <typename image_view_type> - bool operator() ( - const image_view_type& img, - const point& a, - const point& b - ) const - { - return (img[a.y()][a.x()] == img[b.y()][b.x()]); - } - }; - -// ---------------------------------------------------------------------------------------- - - struct zero_pixels_are_background - { - /*! - WHAT THIS OBJECT REPRESENTS - This object is a background testing functor for use - with the label_connected_blobs() routine defined below. - !*/ - - template <typename image_view_type> - bool operator() ( - const image_view_type& img, - const point& p - ) const - { - return img[p.y()][p.x()] == 0; - } - - }; - - struct nothing_is_background - { - /*! - WHAT THIS OBJECT REPRESENTS - This object is a background testing functor for use - with the label_connected_blobs() routine defined below. - !*/ - - template <typename image_view_type> - bool operator() ( - const image_view_type&, - const point& - ) const - { - return false; - } - - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type, - typename label_image_type, - typename background_functor_type, - typename neighbors_functor_type, - typename connected_functor_type - > - unsigned long label_connected_blobs ( - const image_type& img, - const background_functor_type& is_background, - const neighbors_functor_type& get_neighbors, - const connected_functor_type& is_connected, - label_image_type& label_img - ); - /*! - requires - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - label_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h and it must contain integer pixels. - - is_background(img, point(c,r)) is a legal expression that evaluates to a bool. - - is_connected(img, point(c,r), point(c2,r2)) is a legal expression that - evaluates to a bool. - - get_neighbors(point(c,r), neighbors) is a legal expression where neighbors - is of type std::vector<point>. - - is_same_object(img, label_img) == false - ensures - - This function labels each of the connected blobs in img with a unique integer - label. - - An image can be thought of as a graph where pixels A and B are connected if - and only if the following two statements are satisfied: - - is_connected(img,A,B) == true - - get_neighbors(A, neighbors) results in neighbors containing B or - get_neighbors(B, neighbors) results in neighbors containing A. - Then this function can be understood as labeling all the connected components - of this pixel graph such that all pixels in a component get the same label while - pixels in different components get different labels. Note that there is a - special "background" component determined by is_background(). Any pixels which - are "background" always get a blob id of 0 regardless of any other considerations. - - #label_img.nr() == img.nr() - - #label_img.nc() == img.nc() - - for all valid r and c: - - #label_img[r][c] == the blob label number for pixel img[r][c]. - - #label_img[r][c] >= 0 - - if (is_background(img, point(c,r))) then - - #label_img[r][c] == 0 - - else - - #label_img[r][c] != 0 - - if (img.size() != 0) then - - returns max(mat(#label_img))+1 - (i.e. returns a number one greater than the maximum blob id number, - this is the number of blobs found.) - - else - - returns 0 - - blob labels are contiguous, therefore, the number returned by this function is - the number of blobs in the image (including the background blob). - - It is guaranteed that is_connected() and is_background() will never be - called with points outside the image. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_LABEL_CONNeCTED_BLOBS_ABSTRACT_H_ - diff --git a/ml/dlib/dlib/image_transforms/lbp.h b/ml/dlib/dlib/image_transforms/lbp.h deleted file mode 100644 index b6bbac9cf..000000000 --- a/ml/dlib/dlib/image_transforms/lbp.h +++ /dev/null @@ -1,307 +0,0 @@ -// Copyright (C) 2014 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_LBP_Hh_ -#define DLIB_LBP_Hh_ - -#include "lbp_abstract.h" -#include "../image_processing/generic_image.h" -#include "assign_image.h" -#include "../pixel.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type, - typename image_type2 - > - void make_uniform_lbp_image ( - const image_type& img_, - image_type2& lbp_ - ) - { - const static unsigned char uniform_lbps[] = { - 0, 1, 2, 3, 4, 58, 5, 6, 7, 58, 58, 58, 8, 58, 9, 10, 11, 58, 58, 58, 58, 58, - 58, 58, 12, 58, 58, 58, 13, 58, 14, 15, 16, 58, 58, 58, 58, 58, 58, 58, 58, 58, - 58, 58, 58, 58, 58, 58, 17, 58, 58, 58, 58, 58, 58, 58, 18, 58, 58, 58, 19, 58, - 20, 21, 22, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, - 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 23, 58, 58, 58, 58, 58, - 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 24, 58, 58, 58, 58, 58, 58, 58, 25, 58, - 58, 58, 26, 58, 27, 28, 29, 30, 58, 31, 58, 58, 58, 32, 58, 58, 58, 58, 58, 58, - 58, 33, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 34, 58, 58, - 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, - 58, 58, 58, 58, 58, 58, 58, 58, 58, 35, 36, 37, 58, 38, 58, 58, 58, 39, 58, 58, - 58, 58, 58, 58, 58, 40, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, - 58, 41, 42, 43, 58, 44, 58, 58, 58, 45, 58, 58, 58, 58, 58, 58, 58, 46, 47, 48, - 58, 49, 58, 58, 58, 50, 51, 52, 58, 53, 54, 55, 56, 57 - }; - - COMPILE_TIME_ASSERT(sizeof(uniform_lbps) == 256); - - const_image_view<image_type> img(img_); - image_view<image_type2> lbp(lbp_); - - lbp.set_size(img.nr(), img.nc()); - - // set all the border pixels to the "non-uniform LBP value". - assign_border_pixels(lbp, 1, 1, 58); - - typedef typename image_traits<image_type>::pixel_type pixel_type; - typedef typename pixel_traits<pixel_type>::basic_pixel_type basic_pixel_type; - - for (long r = 1; r+1 < img.nr(); ++r) - { - for (long c = 1; c+1 < img.nc(); ++c) - { - const basic_pixel_type pix = get_pixel_intensity(img[r][c]); - unsigned char b1 = 0; - unsigned char b2 = 0; - unsigned char b3 = 0; - unsigned char b4 = 0; - unsigned char b5 = 0; - unsigned char b6 = 0; - unsigned char b7 = 0; - unsigned char b8 = 0; - - unsigned char x = 0; - if (get_pixel_intensity(img[r-1][c-1]) > pix) b1 = 0x80; - if (get_pixel_intensity(img[r-1][c ]) > pix) b2 = 0x40; - if (get_pixel_intensity(img[r-1][c+1]) > pix) b3 = 0x20; - x |= b1; - if (get_pixel_intensity(img[r ][c-1]) > pix) b4 = 0x10; - x |= b2; - if (get_pixel_intensity(img[r ][c+1]) > pix) b5 = 0x08; - x |= b3; - if (get_pixel_intensity(img[r+1][c-1]) > pix) b6 = 0x04; - x |= b4; - if (get_pixel_intensity(img[r+1][c ]) > pix) b7 = 0x02; - x |= b5; - if (get_pixel_intensity(img[r+1][c+1]) > pix) b8 = 0x01; - - x |= b6; - x |= b7; - x |= b8; - - lbp[r][c] = uniform_lbps[x]; - } - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type, - typename T - > - void extract_histogram_descriptors ( - const image_type& img_, - const point& loc, - std::vector<T>& histograms, - const unsigned int cell_size = 10, - const unsigned int block_size = 4, - const unsigned int max_val = 58 - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(cell_size >= 1 && block_size >= 1 && max_val < 256 && - (unsigned int)max(mat(img_)) <= max_val, - "\t void extract_histogram_descriptors()" - << "\n\t Invalid inputs were given to this function." - << "\n\t cell_size: " << cell_size - << "\n\t block_size: " << block_size - << "\n\t max_val: " << max_val - << "\n\t max(mat(img_)): " << max(mat(img_)) - ); - - typedef typename image_traits<image_type>::pixel_type pixel_type; - COMPILE_TIME_ASSERT((is_same_type<pixel_type, unsigned char>::value)); - - const_image_view<image_type> img(img_); - - const rectangle area = get_rect(img); - const rectangle window = centered_rect(loc, block_size*cell_size, block_size*cell_size); - unsigned int cell_top = window.top(); - for (unsigned int br = 0; br < block_size; ++br) - { - unsigned int cell_left = window.left(); - for (unsigned int bc = 0; bc < block_size; ++bc) - { - // figure out the cell boundaries - rectangle cell(cell_left, cell_top, cell_left+cell_size-1, cell_top+cell_size-1); - cell = cell.intersect(area); - - // make the actual histogram for this cell - unsigned int hist[256] = {0}; - for (long r = cell.top(); r <= cell.bottom(); ++r) - { - for (long c = cell.left(); c <= cell.right(); ++c) - { - hist[img[r][c]]++; - } - } - - // copy histogram into the output. - histograms.insert(histograms.end(), hist, hist + max_val+1); - - cell_left += cell_size; - } - cell_top += cell_size; - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type, - typename T - > - void extract_uniform_lbp_descriptors ( - const image_type& img, - std::vector<T>& feats, - const unsigned int cell_size = 10 - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(cell_size >= 1, - "\t void extract_uniform_lbp_descriptors()" - << "\n\t Invalid inputs were given to this function." - << "\n\t cell_size: " << cell_size - ); - - feats.clear(); - array2d<unsigned char> lbp; - make_uniform_lbp_image(img, lbp); - for (long r = 0; r < lbp.nr(); r+=cell_size) - { - for (long c = 0; c < lbp.nc(); c+=cell_size) - { - const rectangle cell = rectangle(c,r,c+cell_size-1,r+cell_size-1).intersect(get_rect(lbp)); - // make the actual histogram for this cell - unsigned int hist[59] = {0}; - for (long r = cell.top(); r <= cell.bottom(); ++r) - { - for (long c = cell.left(); c <= cell.right(); ++c) - { - hist[lbp[r][c]]++; - } - } - - // copy histogram into the output. - feats.insert(feats.end(), hist, hist + 59); - } - } - - for (unsigned long i = 0; i < feats.size(); ++i) - feats[i] = std::sqrt(feats[i]); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type, - typename T - > - void extract_highdim_face_lbp_descriptors ( - const image_type& img, - const full_object_detection& det, - std::vector<T>& feats - ) - { - // make sure requires clause is not broken - DLIB_CASSERT(det.num_parts() == 68, - "\t void extract_highdim_face_lbp_descriptors()" - << "\n\t Invalid inputs were given to this function." - << "\n\t det.num_parts(): " << det.num_parts() - ); - - const unsigned long num_scales = 5; - feats.clear(); - dlib::vector<double,2> l, r; - double cnt = 0; - // Find the center of the left eye by averaging the points around - // the eye. - for (unsigned long i = 36; i <= 41; ++i) - { - l += det.part(i); - ++cnt; - } - l /= cnt; - - // Find the center of the right eye by averaging the points around - // the eye. - cnt = 0; - for (unsigned long i = 42; i <= 47; ++i) - { - r += det.part(i); - ++cnt; - } - r /= cnt; - - // We only do feature extraction from these face parts. These are things like the - // corners of the eyes and mouth and stuff like that. - std::vector<point> parts; - parts.reserve(30); - parts.push_back(l); - parts.push_back(r); - parts.push_back(det.part(17)); - parts.push_back(det.part(21)); - parts.push_back(det.part(22)); - parts.push_back(det.part(26)); - parts.push_back(det.part(36)); - parts.push_back(det.part(39)); - parts.push_back(det.part(42)); - parts.push_back(det.part(45)); - parts.push_back(det.part(27)); - parts.push_back(det.part(28)); - parts.push_back(det.part(29)); - parts.push_back(det.part(30)); - parts.push_back(det.part(31)); - parts.push_back(det.part(35)); - parts.push_back(det.part(33)); - parts.push_back(det.part(48)); - parts.push_back(det.part(54)); - parts.push_back(det.part(51)); - parts.push_back(det.part(57)); - - array2d<unsigned char> lbp; - make_uniform_lbp_image(img, lbp); - for (unsigned long i = 0; i < parts.size(); ++i) - extract_histogram_descriptors(lbp, parts[i], feats); - - if (num_scales > 1) - { - pyramid_down<4> pyr; - image_type img_temp; - pyr(img, img_temp); - unsigned long num_pyr_calls = 1; - - // now pull the features out at coarser scales - for (unsigned long iter = 1; iter < num_scales; ++iter) - { - // now do the feature extraction - make_uniform_lbp_image(img_temp, lbp); - for (unsigned long i = 0; i < parts.size(); ++i) - extract_histogram_descriptors(lbp, pyr.point_down(parts[i],num_pyr_calls), feats); - - if (iter+1 < num_scales) - { - pyr(img_temp); - ++num_pyr_calls; - } - } - } - - for (unsigned long i = 0; i < feats.size(); ++i) - feats[i] = std::sqrt(feats[i]); - - DLIB_ASSERT(feats.size() == 99120, feats.size()); - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_LBP_Hh_ - diff --git a/ml/dlib/dlib/image_transforms/lbp_abstract.h b/ml/dlib/dlib/image_transforms/lbp_abstract.h deleted file mode 100644 index 1a20082a2..000000000 --- a/ml/dlib/dlib/image_transforms/lbp_abstract.h +++ /dev/null @@ -1,139 +0,0 @@ -// Copyright (C) 2014 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_LBP_ABSTRACT_Hh_ -#ifdef DLIB_LBP_ABSTRACT_Hh_ - -#include "../image_processing/generic_image.h" -#include "../pixel.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type, - typename image_type2 - > - void make_uniform_lbp_image ( - const image_type& img, - image_type2& lbp - ); - /*! - requires - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - image_type2 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - image_type2 should contain a grayscale pixel type such as unsigned char. - ensures - - #lbp.nr() == img.nr() - - #lbp.nc() == img.nc() - - This function extracts the uniform local-binary-pattern feature at every pixel - and stores it into #lbp. In particular, we have the following for all valid - r and c: - - #lbp[r][c] == the uniform LBP for the 3x3 pixel window centered on img[r][c]. - In particular, this is a value in the range 0 to 58 inclusive. - - We use the idea of uniform LBPs from the paper: - Face Description with Local Binary Patterns: Application to Face Recognition - by Ahonen, Hadid, and Pietikainen. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type, - typename T - > - void extract_histogram_descriptors ( - const image_type& img, - const point& loc, - std::vector<T>& histograms, - const unsigned int cell_size = 10, - const unsigned int block_size = 4, - const unsigned int max_val = 58 - ); - /*! - requires - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - image_type contains unsigned char valued pixels. - - T is some scalar type like int or double - - All pixel values in img are <= max_val - - cell_size >= 1 - - block_size >= 1 - - max_val < 256 - ensures - - This function extracts histograms of pixel values from block_size*block_size - windows in the area in img immediately around img[loc.y()][loc.x()]. The - histograms are appended onto the end of #histograms. Each window is - cell_size pixels wide and tall. Moreover, the windows do not overlap. - - #histograms.size() == histograms.size() + block_size*block_size*(max_val+1) - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type, - typename T - > - void extract_uniform_lbp_descriptors ( - const image_type& img, - std::vector<T>& feats, - const unsigned int cell_size = 10 - ); - /*! - requires - - cell_size >= 1 - - T is some scalar type like int or double - ensures - - Extracts histograms of uniform local-binary-patterns from img. The - histograms are from densely tiled windows that are cell_size pixels wide and - tall. The windows do not overlap and cover all of img. - - #feats.size() == 59*(number of windows that fit into img) - (i.e. #feats contains the LBP histograms) - - We will have taken the square root of all the histogram elements. That is, - #feats[i] is the square root of the number of LBPs that appeared in its - corresponding window. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type, - typename T - > - void extract_highdim_face_lbp_descriptors ( - const image_type& img, - const full_object_detection& det, - std::vector<T>& feats - ); - /*! - requires - - T is some scalar type like int or double - - det.num_parts() == 68 - ensures - - This function extracts the high-dimensional LBP feature described in the - paper: - Blessing of Dimensionality: High-dimensional Feature and Its Efficient - Compression for Face Verification by Dong Chen, Xudong Cao, Fang Wen, and - Jian Sun - - #feats == the high-dimensional LBP descriptor. It is the concatenation of - many LBP histograms, each extracted from different scales and from different - windows around different face landmarks. We also take the square root of - each histogram element before storing it into #feats. - - #feats.size() == 99120 - - This function assumes img has already been aligned and normalized to a - standard size. - - This function assumes det contains a human face detection with face parts - annotated using the annotation scheme from the iBUG 300-W face landmark - dataset. This means that det.part(i) gives the locations of different face - landmarks according to the iBUG 300-W annotation scheme. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_LBP_ABSTRACT_Hh_ - diff --git a/ml/dlib/dlib/image_transforms/morphological_operations.h b/ml/dlib/dlib/image_transforms/morphological_operations.h deleted file mode 100644 index a659e4bdc..000000000 --- a/ml/dlib/dlib/image_transforms/morphological_operations.h +++ /dev/null @@ -1,846 +0,0 @@ -// Copyright (C) 2006 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_MORPHOLOGICAL_OPERATIONs_ -#define DLIB_MORPHOLOGICAL_OPERATIONs_ - -#include "../pixel.h" -#include "thresholding.h" -#include "morphological_operations_abstract.h" -#include "assign_image.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - namespace morphological_operations_helpers - { - template <typename image_type> - bool is_binary_image ( - const image_type& img_ - ) - /*! - ensures - - returns true if img_ contains only on_pixel and off_pixel values. - - returns false otherwise - !*/ - { - const_image_view<image_type> img(img_); - for (long r = 0; r < img.nr(); ++r) - { - for (long c = 0; c < img.nc(); ++c) - { - if (img[r][c] != on_pixel && img[r][c] != off_pixel) - { - return false; - } - } - } - return true; - } - - template < - long M, - long N - > - bool is_binary_image ( - const unsigned char (&structuring_element)[M][N] - ) - /*! - ensures - - returns true if structuring_element contains only on_pixel and off_pixel values. - - returns false otherwise - !*/ - { - for (long m = 0; m < M; ++m) - { - for (long n = 0; n < N; ++n) - { - if (structuring_element[m][n] != on_pixel && - structuring_element[m][n] != off_pixel) - { - return false; - } - } - } - return true; - } - - } - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type, - long M, - long N - > - void binary_dilation ( - const in_image_type& in_img_, - out_image_type& out_img_, - const unsigned char (&structuring_element)[M][N] - ) - { - typedef typename image_traits<in_image_type>::pixel_type in_pixel_type; - typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; - COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false ); - COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false ); - - using namespace morphological_operations_helpers; - COMPILE_TIME_ASSERT(M%2 == 1); - COMPILE_TIME_ASSERT(N%2 == 1); - DLIB_ASSERT(is_same_object(in_img_,out_img_) == false, - "\tvoid binary_dilation()" - << "\n\tYou must give two different image objects" - ); - COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type>::grayscale); - DLIB_ASSERT(is_binary_image(in_img_) , - "\tvoid binary_dilation()" - << "\n\tin_img must be a binary image" - ); - DLIB_ASSERT(is_binary_image(structuring_element) , - "\tvoid binary_dilation()" - << "\n\tthe structuring_element must be a binary image" - ); - - - const_image_view<in_image_type> in_img(in_img_); - image_view<out_image_type> out_img(out_img_); - - // if there isn't any input image then don't do anything - if (in_img.size() == 0) - { - out_img.clear(); - return; - } - - out_img.set_size(in_img.nr(),in_img.nc()); - - // apply the filter to the image - for (long r = 0; r < in_img.nr(); ++r) - { - for (long c = 0; c < in_img.nc(); ++c) - { - unsigned char out_pixel = off_pixel; - for (long m = 0; m < M && out_pixel == off_pixel; ++m) - { - for (long n = 0; n < N && out_pixel == off_pixel; ++n) - { - if (structuring_element[m][n] == on_pixel) - { - // if this pixel is inside the image then get it from the image - // but if it isn't just pretend it was an off_pixel value - if (r+m >= M/2 && c+n >= N/2 && - r+m-M/2 < in_img.nr() && c+n-N/2 < in_img.nc()) - { - out_pixel = in_img[r+m-M/2][c+n-N/2]; - } - } - } - } - assign_pixel(out_img[r][c], out_pixel); - } - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type, - long M, - long N - > - void binary_erosion ( - const in_image_type& in_img_, - out_image_type& out_img_, - const unsigned char (&structuring_element)[M][N] - ) - { - typedef typename image_traits<in_image_type>::pixel_type in_pixel_type; - typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; - COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false ); - COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false ); - - using namespace morphological_operations_helpers; - COMPILE_TIME_ASSERT(M%2 == 1); - COMPILE_TIME_ASSERT(N%2 == 1); - DLIB_ASSERT(is_same_object(in_img_,out_img_) == false, - "\tvoid binary_erosion()" - << "\n\tYou must give two different image objects" - ); - COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type>::grayscale); - DLIB_ASSERT(is_binary_image(in_img_) , - "\tvoid binary_erosion()" - << "\n\tin_img must be a binary image" - ); - DLIB_ASSERT(is_binary_image(structuring_element) , - "\tvoid binary_erosion()" - << "\n\tthe structuring_element must be a binary image" - ); - - const_image_view<in_image_type> in_img(in_img_); - image_view<out_image_type> out_img(out_img_); - - - // if there isn't any input image then don't do anything - if (in_img.size() == 0) - { - out_img.clear(); - return; - } - - out_img.set_size(in_img.nr(),in_img.nc()); - - // apply the filter to the image - for (long r = 0; r < in_img.nr(); ++r) - { - for (long c = 0; c < in_img.nc(); ++c) - { - unsigned char out_pixel = on_pixel; - for (long m = 0; m < M && out_pixel == on_pixel; ++m) - { - for (long n = 0; n < N && out_pixel == on_pixel; ++n) - { - if (structuring_element[m][n] == on_pixel) - { - // if this pixel is inside the image then get it from the image - // but if it isn't just pretend it was an off_pixel value - if (r+m >= M/2 && c+n >= N/2 && - r+m-M/2 < in_img.nr() && c+n-N/2 < in_img.nc()) - { - out_pixel = in_img[r+m-M/2][c+n-N/2]; - } - else - { - out_pixel = off_pixel; - } - } - } - } - assign_pixel(out_img[r][c], out_pixel); - } - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type, - long M, - long N - > - void binary_open ( - const in_image_type& in_img, - out_image_type& out_img, - const unsigned char (&structuring_element)[M][N], - const unsigned long iter = 1 - ) - { - typedef typename image_traits<in_image_type>::pixel_type in_pixel_type; - typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; - COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false ); - COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false ); - - using namespace morphological_operations_helpers; - COMPILE_TIME_ASSERT(M%2 == 1); - COMPILE_TIME_ASSERT(N%2 == 1); - DLIB_ASSERT(is_same_object(in_img,out_img) == false, - "\tvoid binary_open()" - << "\n\tYou must give two different image objects" - ); - COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type>::grayscale); - DLIB_ASSERT(is_binary_image(in_img) , - "\tvoid binary_open()" - << "\n\tin_img must be a binary image" - ); - DLIB_ASSERT(is_binary_image(structuring_element) , - "\tvoid binary_open()" - << "\n\tthe structuring_element must be a binary image" - ); - - - // if there isn't any input image then don't do anything - if (num_rows(in_img)*num_columns(in_img) == 0) - { - set_image_size(out_img, 0,0); - return; - } - - set_image_size(out_img, num_rows(in_img), num_columns(in_img)); - - if (iter == 0) - { - // just copy the image over - assign_image(out_img, in_img); - } - else if (iter == 1) - { - in_image_type temp; - binary_erosion(in_img,temp,structuring_element); - binary_dilation(temp,out_img,structuring_element); - } - else - { - in_image_type temp1, temp2; - binary_erosion(in_img,temp1,structuring_element); - - // do the extra erosions - for (unsigned long i = 1; i < iter; ++i) - { - swap(temp1, temp2); - binary_erosion(temp2,temp1,structuring_element); - } - - // do the extra dilations - for (unsigned long i = 1; i < iter; ++i) - { - swap(temp1, temp2); - binary_dilation(temp2,temp1,structuring_element); - } - - binary_dilation(temp1,out_img,structuring_element); - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type, - long M, - long N - > - void binary_close ( - const in_image_type& in_img, - out_image_type& out_img, - const unsigned char (&structuring_element)[M][N], - const unsigned long iter = 1 - ) - { - typedef typename image_traits<in_image_type>::pixel_type in_pixel_type; - typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; - COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false ); - COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false ); - - - using namespace morphological_operations_helpers; - COMPILE_TIME_ASSERT(M%2 == 1); - COMPILE_TIME_ASSERT(N%2 == 1); - DLIB_ASSERT(is_same_object(in_img,out_img) == false, - "\tvoid binary_close()" - << "\n\tYou must give two different image objects" - ); - COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type>::grayscale); - DLIB_ASSERT(is_binary_image(in_img) , - "\tvoid binary_close()" - << "\n\tin_img must be a binary image" - ); - DLIB_ASSERT(is_binary_image(structuring_element) , - "\tvoid binary_close()" - << "\n\tthe structuring_element must be a binary image" - ); - - - // if there isn't any input image then don't do anything - if (num_rows(in_img)*num_columns(in_img) == 0) - { - set_image_size(out_img, 0,0); - return; - } - - set_image_size(out_img, num_rows(in_img), num_columns(in_img)); - - if (iter == 0) - { - // just copy the image over - assign_image(out_img, in_img); - } - else if (iter == 1) - { - in_image_type temp; - binary_dilation(in_img,temp,structuring_element); - binary_erosion(temp,out_img,structuring_element); - } - else - { - in_image_type temp1, temp2; - binary_dilation(in_img,temp1,structuring_element); - - // do the extra dilations - for (unsigned long i = 1; i < iter; ++i) - { - swap(temp1, temp2); - binary_dilation(temp2,temp1,structuring_element); - } - - // do the extra erosions - for (unsigned long i = 1; i < iter; ++i) - { - swap(temp1, temp2); - binary_erosion(temp2,temp1,structuring_element); - } - - binary_erosion(temp1,out_img,structuring_element); - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type1, - typename in_image_type2, - typename out_image_type - > - void binary_intersection ( - const in_image_type1& in_img1_, - const in_image_type2& in_img2_, - out_image_type& out_img_ - ) - { - typedef typename image_traits<in_image_type1>::pixel_type in_pixel_type1; - typedef typename image_traits<in_image_type2>::pixel_type in_pixel_type2; - typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; - COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type1>::has_alpha == false ); - COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type2>::has_alpha == false ); - COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false ); - - using namespace morphological_operations_helpers; - COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type1>::grayscale); - COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type2>::grayscale); - DLIB_ASSERT(is_binary_image(in_img1_) , - "\tvoid binary_intersection()" - << "\n\tin_img1 must be a binary image" - ); - DLIB_ASSERT(is_binary_image(in_img2_) , - "\tvoid binary_intersection()" - << "\n\tin_img2 must be a binary image" - ); - - const_image_view<in_image_type1> in_img1(in_img1_); - const_image_view<in_image_type2> in_img2(in_img2_); - image_view<out_image_type> out_img(out_img_); - - DLIB_ASSERT(in_img1.nc() == in_img2.nc(), - "\tvoid binary_intersection()" - << "\n\tin_img1 and in_img2 must have the same ncs." - << "\n\tin_img1.nc(): " << in_img1.nc() - << "\n\tin_img2.nc(): " << in_img2.nc() - ); - DLIB_ASSERT(in_img1.nr() == in_img2.nr(), - "\tvoid binary_intersection()" - << "\n\tin_img1 and in_img2 must have the same nrs." - << "\n\tin_img1.nr(): " << in_img1.nr() - << "\n\tin_img2.nr(): " << in_img2.nr() - ); - - - - // if there isn't any input image then don't do anything - if (in_img1.size() == 0) - { - out_img.clear(); - return; - } - - out_img.set_size(in_img1.nr(),in_img1.nc()); - - for (long r = 0; r < in_img1.nr(); ++r) - { - for (long c = 0; c < in_img1.nc(); ++c) - { - if (in_img1[r][c] == on_pixel && in_img2[r][c] == on_pixel) - assign_pixel(out_img[r][c], on_pixel); - else - assign_pixel(out_img[r][c], off_pixel); - } - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type1, - typename in_image_type2, - typename out_image_type - > - void binary_union ( - const in_image_type1& in_img1_, - const in_image_type2& in_img2_, - out_image_type& out_img_ - ) - { - typedef typename image_traits<in_image_type1>::pixel_type in_pixel_type1; - typedef typename image_traits<in_image_type2>::pixel_type in_pixel_type2; - typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; - COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type1>::has_alpha == false ); - COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type2>::has_alpha == false ); - COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false ); - - - using namespace morphological_operations_helpers; - COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type1>::grayscale); - COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type2>::grayscale); - DLIB_ASSERT(is_binary_image(in_img1_) , - "\tvoid binary_intersection()" - << "\n\tin_img1 must be a binary image" - ); - DLIB_ASSERT(is_binary_image(in_img2_) , - "\tvoid binary_intersection()" - << "\n\tin_img2 must be a binary image" - ); - - const_image_view<in_image_type1> in_img1(in_img1_); - const_image_view<in_image_type2> in_img2(in_img2_); - image_view<out_image_type> out_img(out_img_); - - DLIB_ASSERT(in_img1.nc() == in_img2.nc(), - "\tvoid binary_intersection()" - << "\n\tin_img1 and in_img2 must have the same ncs." - << "\n\tin_img1.nc(): " << in_img1.nc() - << "\n\tin_img2.nc(): " << in_img2.nc() - ); - DLIB_ASSERT(in_img1.nr() == in_img2.nr(), - "\tvoid binary_intersection()" - << "\n\tin_img1 and in_img2 must have the same nrs." - << "\n\tin_img1.nr(): " << in_img1.nr() - << "\n\tin_img2.nr(): " << in_img2.nr() - ); - - - - // if there isn't any input image then don't do anything - if (in_img1.size() == 0) - { - out_img.clear(); - return; - } - - out_img.set_size(in_img1.nr(),in_img1.nc()); - - for (long r = 0; r < in_img1.nr(); ++r) - { - for (long c = 0; c < in_img1.nc(); ++c) - { - if (in_img1[r][c] == on_pixel || in_img2[r][c] == on_pixel) - assign_pixel(out_img[r][c], on_pixel); - else - assign_pixel(out_img[r][c], off_pixel); - } - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type1, - typename in_image_type2, - typename out_image_type - > - void binary_difference ( - const in_image_type1& in_img1_, - const in_image_type2& in_img2_, - out_image_type& out_img_ - ) - { - typedef typename image_traits<in_image_type1>::pixel_type in_pixel_type1; - typedef typename image_traits<in_image_type2>::pixel_type in_pixel_type2; - typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; - COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type1>::has_alpha == false ); - COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type2>::has_alpha == false ); - COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false ); - - using namespace morphological_operations_helpers; - COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type1>::grayscale); - COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type2>::grayscale); - DLIB_ASSERT(is_binary_image(in_img1_) , - "\tvoid binary_difference()" - << "\n\tin_img1 must be a binary image" - ); - DLIB_ASSERT(is_binary_image(in_img2_) , - "\tvoid binary_difference()" - << "\n\tin_img2 must be a binary image" - ); - - const_image_view<in_image_type1> in_img1(in_img1_); - const_image_view<in_image_type2> in_img2(in_img2_); - image_view<out_image_type> out_img(out_img_); - - DLIB_ASSERT(in_img1.nc() == in_img2.nc(), - "\tvoid binary_difference()" - << "\n\tin_img1 and in_img2 must have the same ncs." - << "\n\tin_img1.nc(): " << in_img1.nc() - << "\n\tin_img2.nc(): " << in_img2.nc() - ); - DLIB_ASSERT(in_img1.nr() == in_img2.nr(), - "\tvoid binary_difference()" - << "\n\tin_img1 and in_img2 must have the same nrs." - << "\n\tin_img1.nr(): " << in_img1.nr() - << "\n\tin_img2.nr(): " << in_img2.nr() - ); - - - - // if there isn't any input image then don't do anything - if (in_img1.size() == 0) - { - out_img.clear(); - return; - } - - out_img.set_size(in_img1.nr(),in_img1.nc()); - - for (long r = 0; r < in_img1.nr(); ++r) - { - for (long c = 0; c < in_img1.nc(); ++c) - { - if (in_img1[r][c] == on_pixel && in_img2[r][c] == off_pixel) - assign_pixel(out_img[r][c], on_pixel); - else - assign_pixel(out_img[r][c], off_pixel); - } - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type - > - void binary_complement ( - const in_image_type& in_img_, - out_image_type& out_img_ - ) - { - typedef typename image_traits<in_image_type>::pixel_type in_pixel_type; - typedef typename image_traits<out_image_type>::pixel_type out_pixel_type; - COMPILE_TIME_ASSERT( pixel_traits<in_pixel_type>::has_alpha == false ); - COMPILE_TIME_ASSERT( pixel_traits<out_pixel_type>::has_alpha == false ); - - - using namespace morphological_operations_helpers; - COMPILE_TIME_ASSERT(pixel_traits<in_pixel_type>::grayscale); - DLIB_ASSERT(is_binary_image(in_img_) , - "\tvoid binary_complement()" - << "\n\tin_img must be a binary image" - ); - - const_image_view<in_image_type> in_img(in_img_); - image_view<out_image_type> out_img(out_img_); - - // if there isn't any input image then don't do anything - if (in_img.size() == 0) - { - out_img.clear(); - return; - } - - out_img.set_size(in_img.nr(),in_img.nc()); - - for (long r = 0; r < in_img.nr(); ++r) - { - for (long c = 0; c < in_img.nc(); ++c) - { - if (in_img[r][c] == on_pixel) - assign_pixel(out_img[r][c], off_pixel); - else - assign_pixel(out_img[r][c], on_pixel); - } - } - } - - template < - typename image_type - > - void binary_complement ( - image_type& img - ) - { - binary_complement(img,img); - } - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - namespace impl - { - template <typename image_type> - inline bool should_remove_pixel ( - const image_type& img, - long r, - long c, - int iter - ) - { - unsigned int p2 = img[r-1][c]; - unsigned int p3 = img[r-1][c+1]; - unsigned int p4 = img[r][c+1]; - unsigned int p5 = img[r+1][c+1]; - unsigned int p6 = img[r+1][c]; - unsigned int p7 = img[r+1][c-1]; - unsigned int p8 = img[r][c-1]; - unsigned int p9 = img[r-1][c-1]; - - int A = (p2 == 0 && p3 == 255) + (p3 == 0 && p4 == 255) + - (p4 == 0 && p5 == 255) + (p5 == 0 && p6 == 255) + - (p6 == 0 && p7 == 255) + (p7 == 0 && p8 == 255) + - (p8 == 0 && p9 == 255) + (p9 == 0 && p2 == 255); - int B = p2 + p3 + p4 + p5 + p6 + p7 + p8 + p9; - int m1 = iter == 0 ? (p2 * p4 * p6) : (p2 * p4 * p8); - int m2 = iter == 0 ? (p4 * p6 * p8) : (p2 * p6 * p8); - // Decide if we should remove the pixel img[r][c]. - return (A == 1 && (B >= 2*255 && B <= 6*255) && m1 == 0 && m2 == 0); - } - - template <typename image_type> - inline void add_to_remove ( - std::vector<point>& to_remove, - array2d<unsigned char>& marker, - const image_type& img, - long r, - long c, - int iter - ) - { - if (marker[r][c]&&should_remove_pixel(img,r,c,iter)) - { - to_remove.push_back(point(c,r)); - marker[r][c] = 0; - } - } - - template <typename image_type> - inline bool is_bw_border_pixel( - const image_type& img, - long r, - long c - ) - { - unsigned int p2 = img[r-1][c]; - unsigned int p3 = img[r-1][c+1]; - unsigned int p4 = img[r][c+1]; - unsigned int p5 = img[r+1][c+1]; - unsigned int p6 = img[r+1][c]; - unsigned int p7 = img[r+1][c-1]; - unsigned int p8 = img[r][c-1]; - unsigned int p9 = img[r-1][c-1]; - - int B = p2 + p3 + p4 + p5 + p6 + p7 + p8 + p9; - // If you are on but at least one of your neighbors isn't. - return B<8*255 && img[r][c]; - - } - - inline void add_if( - std::vector<point>& to_check2, - const array2d<unsigned char>& marker, - long c, - long r - ) - { - if (marker[r][c]) - to_check2.push_back(point(c,r)); - } - - } // end namespace impl - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type - > - void skeleton( - image_type& img_ - ) - { - /* - The implementation of this function is based on the paper - "A fast parallel algorithm for thinning digital patterns” by T.Y. Zhang and C.Y. Suen. - and also the excellent discussion of it at: - http://opencv-code.com/quick-tips/implementation-of-thinning-algorithm-in-opencv/ - */ - - typedef typename image_traits<image_type>::pixel_type pixel_type; - - // This function only works on grayscale images - COMPILE_TIME_ASSERT(pixel_traits<pixel_type>::grayscale); - - using namespace impl; - // Note that it's important to zero the border for 2 reasons. First, it allows - // thinning to being at the border of the image. But more importantly, it causes - // the mask to have a border of 0 pixels as well which we use later to avoid - // indexing outside the image inside add_to_remove(). - zero_border_pixels(img_,1,1); - image_view<image_type> img(img_); - - // We use the marker to keep track of pixels we have committed to removing but - // haven't yet removed from img. - array2d<unsigned char> marker(img.nr(), img.nc()); - assign_image(marker, img); - - - // Begin by making a list of the pixels on the borders of binary blobs. - std::vector<point> to_remove, to_check, to_check2; - for (int r = 1; r < img.nr()-1; r++) - { - for (int c = 1; c < img.nc()-1; c++) - { - if (is_bw_border_pixel(img, r, c)) - { - to_check.push_back(point(c,r)); - } - } - } - - // Now start iteratively looking at the border pixels and removing them. - while(to_check.size() != 0) - { - for (int iter = 0; iter <= 1; ++iter) - { - // Check which pixels we should remove - to_remove.clear(); - for (unsigned long i = 0; i < to_check.size(); ++i) - { - long r = to_check[i].y(); - long c = to_check[i].x(); - add_to_remove(to_remove, marker, img, r, c, iter); - } - for (unsigned long i = 0; i < to_check2.size(); ++i) - { - long r = to_check2[i].y(); - long c = to_check2[i].x(); - add_to_remove(to_remove, marker, img, r, c, iter); - } - // Now remove those pixels. Also add their neighbors into the "to check" - // pixel list for the next iteration. - for (unsigned long i = 0; i < to_remove.size(); ++i) - { - long r = to_remove[i].y(); - long c = to_remove[i].x(); - // remove the pixel - img[r][c] = 0; - add_if(to_check2, marker, c-1, r-1); - add_if(to_check2, marker, c, r-1); - add_if(to_check2, marker, c+1, r-1); - add_if(to_check2, marker, c-1, r); - add_if(to_check2, marker, c+1, r); - add_if(to_check2, marker, c-1, r+1); - add_if(to_check2, marker, c, r+1); - add_if(to_check2, marker, c+1, r+1); - } - } - to_check.clear(); - to_check.swap(to_check2); - } - } - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_MORPHOLOGICAL_OPERATIONs_ - diff --git a/ml/dlib/dlib/image_transforms/morphological_operations_abstract.h b/ml/dlib/dlib/image_transforms/morphological_operations_abstract.h deleted file mode 100644 index c69bdd1ca..000000000 --- a/ml/dlib/dlib/image_transforms/morphological_operations_abstract.h +++ /dev/null @@ -1,316 +0,0 @@ -// Copyright (C) 2006 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_MORPHOLOGICAL_OPERATIONs_ABSTRACT_ -#ifdef DLIB_MORPHOLOGICAL_OPERATIONs_ABSTRACT_ - -#include "../pixel.h" -#include "thresholding_abstract.h" -#include "../image_processing/generic_image.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type, - long M, - long N - > - void binary_dilation ( - const in_image_type& in_img, - out_image_type& out_img, - const unsigned char (&structuring_element)[M][N] - ); - /*! - requires - - in_image_type and out_image_type are image objects that implement the - interface defined in dlib/image_processing/generic_image.h - - in_img must contain a grayscale pixel type. - - both in_img and out_img must contain pixels with no alpha channel. - (i.e. pixel_traits::has_alpha==false for their pixels) - - is_same_object(in_img,out_img) == false - - M % 2 == 1 (i.e. M must be odd) - - N % 2 == 1 (i.e. N must be odd) - - all pixels in in_img are set to either on_pixel or off_pixel - (i.e. it must be a binary image) - - all pixels in structuring_element are set to either on_pixel or off_pixel - (i.e. it must be a binary image) - ensures - - Does a binary dilation of in_img using the given structuring element and - stores the result in out_img. - - #out_img.nc() == in_img.nc() - - #out_img.nr() == in_img.nr() - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type, - long M, - long N - > - void binary_erosion ( - const in_image_type& in_img, - out_image_type& out_img, - const unsigned char (&structuring_element)[M][N] - ); - /*! - requires - - in_image_type and out_image_type are image objects that implement the - interface defined in dlib/image_processing/generic_image.h - - in_img must contain a grayscale pixel type. - - both in_img and out_img must contain pixels with no alpha channel. - (i.e. pixel_traits::has_alpha==false for their pixels) - - is_same_object(in_img,out_img) == false - - M % 2 == 1 (i.e. M must be odd) - - N % 2 == 1 (i.e. N must be odd) - - all pixels in in_img are set to either on_pixel or off_pixel - (i.e. it must be a binary image) - - all pixels in structuring_element are set to either on_pixel or off_pixel - (i.e. it must be a binary image) - ensures - - Does a binary erosion of in_img using the given structuring element and - stores the result in out_img. - - #out_img.nc() == in_img.nc() - - #out_img.nr() == in_img.nr() - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type, - long M, - long N - > - void binary_open ( - const in_image_type& in_img, - out_image_type& out_img, - const unsigned char (&structuring_element)[M][N], - const unsigned long iter = 1 - ); - /*! - requires - - in_image_type and out_image_type are image objects that implement the - interface defined in dlib/image_processing/generic_image.h - - in_img must contain a grayscale pixel type. - - both in_img and out_img must contain pixels with no alpha channel. - (i.e. pixel_traits::has_alpha==false for their pixels) - - is_same_object(in_img,out_img) == false - - M % 2 == 1 (i.e. M must be odd) - - N % 2 == 1 (i.e. N must be odd) - - all pixels in in_img are set to either on_pixel or off_pixel - (i.e. it must be a binary image) - - all pixels in structuring_element are set to either on_pixel or off_pixel - (i.e. it must be a binary image) - ensures - - Does a binary open of in_img using the given structuring element and - stores the result in out_img. Specifically, iter iterations of binary - erosion are applied and then iter iterations of binary dilation. - - #out_img.nc() == in_img.nc() - - #out_img.nr() == in_img.nr() - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type, - long M, - long N - > - void binary_close ( - const in_image_type& in_img, - out_image_type& out_img, - const unsigned char (&structuring_element)[M][N], - const unsigned long iter = 1 - ); - /*! - requires - - in_image_type and out_image_type are image objects that implement the - interface defined in dlib/image_processing/generic_image.h - - in_img must contain a grayscale pixel type. - - both in_img and out_img must contain pixels with no alpha channel. - (i.e. pixel_traits::has_alpha==false for their pixels) - - is_same_object(in_img,out_img) == false - - M % 2 == 1 (i.e. M must be odd) - - N % 2 == 1 (i.e. N must be odd) - - all pixels in in_img are set to either on_pixel or off_pixel - (i.e. it must be a binary image) - - all pixels in structuring_element are set to either on_pixel or off_pixel - (i.e. it must be a binary image) - ensures - - Does a binary close of in_img using the given structuring element and - stores the result in out_img. Specifically, iter iterations of binary - dilation are applied and then iter iterations of binary erosion. - - #out_img.nc() == in_img.nc() - - #out_img.nr() == in_img.nr() - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type1, - typename in_image_type2, - typename out_image_type - > - void binary_intersection ( - const in_image_type1& in_img1, - const in_image_type2& in_img2, - out_image_type& out_img - ); - /*! - requires - - in_image_type1, in_image_type2, and out_image_type are image objects that - implement the interface defined in dlib/image_processing/generic_image.h - - in_img1 and in_img2 must contain grayscale pixel types. - - in_img1, in_img2, and out_img must contain pixels with no alpha channel. - (i.e. pixel_traits::has_alpha==false for their pixels) - - all pixels in in_img1 and in_img2 are set to either on_pixel or off_pixel - (i.e. they must be binary images) - - in_img1.nc() == in_img2.nc() - - in_img1.nr() == in_img2.nr() - ensures - - #out_img == the binary intersection of in_img1 and in_img2. (i.e. All - the pixels that are set to on_pixel in both in_img1 and in_img2 will be set - to on_pixel in #out_img. All other pixels will be set to off_pixel) - - #out_img.nc() == in_img.nc() - - #out_img.nr() == in_img.nr() - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type1, - typename in_image_type2, - typename out_image_type - > - void binary_union ( - const in_image_type1& in_img1, - const in_image_type2& in_img2, - out_image_type& out_img - ); - /*! - requires - - in_image_type1, in_image_type2, and out_image_type are image objects that - implement the interface defined in dlib/image_processing/generic_image.h - - in_img1 and in_img2 must contain grayscale pixel types. - - in_img1, in_img2, and out_img must contain pixels with no alpha channel. - (i.e. pixel_traits::has_alpha==false for their pixels) - - all pixels in in_img1 and in_img2 are set to either on_pixel or off_pixel - (i.e. they must be binary images) - - in_img1.nc() == in_img2.nc() - - in_img1.nr() == in_img2.nr() - ensures - - #out_img == the binary union of in_img1 and in_img2. (i.e. All - the pixels that are set to on_pixel in in_img1 and/or in_img2 will be set - to on_pixel in #out_img. All other pixels will be set to off_pixel) - - #out_img.nc() == in_img.nc() - - #out_img.nr() == in_img.nr() - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type1, - typename in_image_type2, - typename out_image_type - > - void binary_difference ( - const in_image_type1& in_img1, - const in_image_type2& in_img2, - out_image_type& out_img - ); - /*! - requires - - in_image_type1, in_image_type2, and out_image_type are image objects that - implement the interface defined in dlib/image_processing/generic_image.h - - in_img1 and in_img2 must contain grayscale pixel types. - - in_img1, in_img2, and out_img must contain pixels with no alpha channel. - (i.e. pixel_traits::has_alpha==false for their pixels) - - all pixels in in_img1 and in_img2 are set to either on_pixel or off_pixel - (i.e. they must be binary images) - - in_img1.nc() == in_img2.nc() - - in_img1.nr() == in_img2.nr() - ensures - - #out_img == the binary difference of in_img1 and in_img2. (i.e. #out_img - will be a copy of in_img1 except that any pixels in in_img2 that are set to - on_pixel will be set to off_pixel) - - #out_img.nc() == in_img.nc() - - #out_img.nr() == in_img.nr() - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type - > - void binary_complement ( - const in_image_type& in_img, - out_image_type& out_img - ); - /*! - requires - - in_image_type and out_image_type are image objects that implement the - interface defined in dlib/image_processing/generic_image.h - - in_img must contain a grayscale pixel type. - - both in_img and out_img must contain pixels with no alpha channel. - (i.e. pixel_traits::has_alpha==false for their pixels) - - all pixels in in_img are set to either on_pixel or off_pixel - (i.e. it must be a binary image) - ensures - - #out_img == the binary complement of in_img. (i.e. For each pixel in - in_img, if it is on_pixel then it will be set to off_pixel in #out_img and - if it was off_pixel in in_img then it will be on_pixel in #out_img) - - #out_img.nc() == in_img.nc() - - #out_img.nr() == in_img.nr() - !*/ - - template < - typename image_type - > - void binary_complement ( - image_type& img - ); - /*! - requires - - it must be valid to call binary_complement(img,img); - ensures - - calls binary_complement(img,img); - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type - > - void skeleton( - image_type& img - ); - /*! - requires - - image_type is an object that implement the interface defined in - dlib/image_processing/generic_image.h - - img must contain a grayscale pixel type. - - all pixels in img are set to either on_pixel or off_pixel. - (i.e. it must be a binary image) - ensures - - This function computes the skeletonization of img and stores the result in - #img. That is, given a binary image, we progressively thin the binary blobs - (composed of on_pixel values) until only a single pixel wide skeleton of the - original blobs remains. - - #img.nc() == img.nc() - - #img.nr() == img.nr() - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_MORPHOLOGICAL_OPERATIONs_ABSTRACT_ - - diff --git a/ml/dlib/dlib/image_transforms/random_color_transform.h b/ml/dlib/dlib/image_transforms/random_color_transform.h deleted file mode 100644 index 7433da1f7..000000000 --- a/ml/dlib/dlib/image_transforms/random_color_transform.h +++ /dev/null @@ -1,157 +0,0 @@ -// Copyright (C) 2016 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_RANDOM_cOLOR_TRANSFORM_Hh_ -#define DLIB_RANDOM_cOLOR_TRANSFORM_Hh_ - -#include "random_color_transform_abstract.h" -#include "../image_processing/generic_image.h" -#include "../pixel.h" -#include "../rand.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - class random_color_transform - { - public: - - random_color_transform ( - dlib::rand& rnd, - const double gamma_magnitude = 0.5, - const double color_magnitude = 0.2 - ) - { - // pick a random gamma correction factor. - double gamma = std::max(0.0, 1 + gamma_magnitude*(rnd.get_random_double()-0.5)); - - // pick a random color balancing scheme. - double red_scale = 1-rnd.get_random_double()*color_magnitude; - double green_scale = 1-rnd.get_random_double()*color_magnitude; - double blue_scale = 1-rnd.get_random_double()*color_magnitude; - const double m = 255*std::max(std::max(red_scale,green_scale),blue_scale); - red_scale /= m; - green_scale /= m; - blue_scale /= m; - - // Now compute a lookup table for all the color channels. The table tells us - // what the transform does. - table.resize(256*3); - unsigned long i = 0; - for (int k = 0; k < 256; ++k) - { - double v = 255*std::pow(k*red_scale, gamma); - table[i++] = (unsigned char)(v + 0.5); - } - for (int k = 0; k < 256; ++k) - { - double v = 255*std::pow(k*green_scale, gamma); - table[i++] = (unsigned char)(v + 0.5); - } - for (int k = 0; k < 256; ++k) - { - double v = 255*std::pow(k*blue_scale, gamma); - table[i++] = (unsigned char)(v + 0.5); - } - } - - rgb_pixel operator()(rgb_pixel p) const - { - p.red = table[(unsigned int)p.red]; - p.green = table[(unsigned int)p.green+256]; - p.blue = table[(unsigned int)p.blue+512]; - return p; - } - - private: - std::vector<unsigned char> table; - }; - -// ---------------------------------------------------------------------------------------- - - template <typename image_type> - void disturb_colors ( - image_type& img_, - dlib::rand& rnd, - const double gamma_magnitude = 0.5, - const double color_magnitude = 0.2 - ) - { - image_view<image_type> img(img_); - random_color_transform tform(rnd, gamma_magnitude, color_magnitude); - for (long r = 0; r < img.nr(); ++r) - { - for (long c = 0; c < img.nc(); ++c) - { - rgb_pixel temp; - assign_pixel(temp, img[r][c]); - temp = tform(temp); - assign_pixel(img[r][c], temp); - } - } - } - -// ---------------------------------------------------------------------------------------- - - template <typename image_type> - void apply_random_color_offset ( - image_type& img_, - dlib::rand& rnd - ) - { - // Make a random color offset. This tform matrix came from looking at the - // covariance matrix of RGB values in a bunch of images. In particular, if you - // multiply Gaussian random vectors by tform it will result in vectors with the - // same covariance matrix as the original RGB data. Also, this color transform is - // what is suggested by the paper: - // Krizhevsky, Alex, Ilya Sutskever, and Geoffrey E. Hinton. "Imagenet - // classification with deep convolutional neural networks." Advances in neural - // information processing systems. 2012. - // Except that we used the square root of the eigenvalues (which I'm pretty sure is - // what the authors intended). - matrix<double,3,3> tform; - tform = -66.379, 25.094, 6.79698, - -68.0492, -0.302309, -13.9539, - -68.4907, -24.0199, 7.27653; - matrix<double,3,1> v; - v = rnd.get_random_gaussian(),rnd.get_random_gaussian(),rnd.get_random_gaussian(); - v = round(tform*0.1*v); - const int roffset = v(0); - const int goffset = v(1); - const int boffset = v(2); - - // Make up lookup tables that apply the color mapping so we don't have to put a - // bunch of complicated conditional branches in the loop below. - unsigned char rtable[256]; - unsigned char gtable[256]; - unsigned char btable[256]; - for (int i = 0; i < 256; ++i) - { - rtable[i] = put_in_range(0, 255, i+roffset); - gtable[i] = put_in_range(0, 255, i+goffset); - btable[i] = put_in_range(0, 255, i+boffset); - } - - // now transform the image. - image_view<image_type> img(img_); - for (long r = 0; r < img.nr(); ++r) - { - for (long c = 0; c < img.nc(); ++c) - { - rgb_pixel temp; - assign_pixel(temp, img[r][c]); - temp.red = rtable[temp.red]; - temp.green = gtable[temp.green]; - temp.blue = btable[temp.blue]; - assign_pixel(img[r][c], temp); - } - } - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_RANDOM_cOLOR_TRANSFORM_Hh_ - diff --git a/ml/dlib/dlib/image_transforms/random_color_transform_abstract.h b/ml/dlib/dlib/image_transforms/random_color_transform_abstract.h deleted file mode 100644 index 5826e16a6..000000000 --- a/ml/dlib/dlib/image_transforms/random_color_transform_abstract.h +++ /dev/null @@ -1,94 +0,0 @@ -// Copyright (C) 2016 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_RANDOM_cOLOR_TRANSFORM_ABSTRACT_Hh_ -#ifdef DLIB_RANDOM_cOLOR_TRANSFORM_ABSTRACT_Hh_ - -#include "../image_processing/generic_image.h" -#include "../pixel.h" -#include "../rand.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - class random_color_transform - { - /*! - WHAT THIS OBJECT REPRESENTS - This object generates a random color balancing and gamma correction - transform. It then allows you to apply that specific transform to as many - rgb_pixel objects as you like. - !*/ - - public: - - random_color_transform ( - dlib::rand& rnd, - const double gamma_magnitude = 0.5, - const double color_magnitude = 0.2 - ); - /*! - requires - - 0 <= gamma_magnitude - - 0 <= color_magnitude <= 1 - ensures - - This constructor generates a random color transform which can be applied - by calling this object's operator() method. - - The color transform is a gamma correction and color rebalancing. If - gamma_magnitude == 0 and color_magnitude == 0 then the transform doesn't - change any colors at all. However, the larger these parameters the more - noticeable the resulting transform. - !*/ - - rgb_pixel operator()( - rgb_pixel p - ) const; - /*! - ensures - - returns the color transformed version of p. - !*/ - }; - -// ---------------------------------------------------------------------------------------- - - template <typename image_type> - void disturb_colors ( - image_type& img, - dlib::rand& rnd, - const double gamma_magnitude = 0.5, - const double color_magnitude = 0.2 - ); - /*! - requires - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - ensures - - Applies a random color transform to the given image. This is done by - creating a random_color_transform with the given parameters and then - transforming each pixel in the image with the resulting transform. - !*/ - -// ---------------------------------------------------------------------------------------- - - template <typename image_type> - void apply_random_color_offset ( - image_type& img, - dlib::rand& rnd - ); - /*! - ensures - - Picks a random color offset vector and adds it to the given image. The offset - vector is selected using the method described in the paper: - Krizhevsky, Alex, Ilya Sutskever, and Geoffrey E. Hinton. "Imagenet - classification with deep convolutional neural networks." Advances in neural - information processing systems. 2012. - In particular, we sample an RGB value from the typical distribution of RGB - values, assuming it has a Gaussian distribution, and then divide it by 10. - This sampled RGB vector is added to each pixel of img. - !*/ - -// ---------------------------------------------------------------------------------------- - -#endif // DLIB_RANDOM_cOLOR_TRANSFORM_ABSTRACT_Hh_ - diff --git a/ml/dlib/dlib/image_transforms/random_cropper.h b/ml/dlib/dlib/image_transforms/random_cropper.h deleted file mode 100644 index 2c754b608..000000000 --- a/ml/dlib/dlib/image_transforms/random_cropper.h +++ /dev/null @@ -1,361 +0,0 @@ -// Copyright (C) 2016 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_RaNDOM_CROPPER_H_ -#define DLIB_RaNDOM_CROPPER_H_ - -#include "random_cropper_abstract.h" -#include "../threads.h" -#include <mutex> -#include <vector> -#include "interpolation.h" -#include "../image_processing/full_object_detection.h" -#include "../rand.h" - -namespace dlib -{ - class random_cropper - { - chip_dims dims = chip_dims(300,300); - bool randomly_flip = true; - double max_rotation_degrees = 30; - long min_object_length_long_dim = 75; // cropped object will be at least this many pixels along its longest edge. - long min_object_length_short_dim = 30; // cropped object will be at least this many pixels along its shortest edge. - double max_object_size = 0.7; // cropped object will be at most this fraction of the size of the image. - double background_crops_fraction = 0.5; - double translate_amount = 0.10; - - std::mutex rnd_mutex; - dlib::rand rnd; - public: - - void set_seed ( - time_t seed - ) { rnd = dlib::rand(seed); } - - double get_translate_amount ( - ) const { return translate_amount; } - - void set_translate_amount ( - double value - ) - { - DLIB_CASSERT(0 <= value); - translate_amount = value; - } - - double get_background_crops_fraction ( - ) const { return background_crops_fraction; } - - void set_background_crops_fraction ( - double value - ) - { - DLIB_CASSERT(0 <= value && value <= 1); - background_crops_fraction = value; - } - - const chip_dims& get_chip_dims( - ) const { return dims; } - - void set_chip_dims ( - const chip_dims& dims_ - ) { dims = dims_; } - - void set_chip_dims ( - unsigned long rows, - unsigned long cols - ) { set_chip_dims(chip_dims(rows,cols)); } - - bool get_randomly_flip ( - ) const { return randomly_flip; } - - void set_randomly_flip ( - bool value - ) { randomly_flip = value; } - - double get_max_rotation_degrees ( - ) const { return max_rotation_degrees; } - void set_max_rotation_degrees ( - double value - ) { max_rotation_degrees = std::abs(value); } - - long get_min_object_length_long_dim ( - ) const { return min_object_length_long_dim; } - long get_min_object_length_short_dim ( - ) const { return min_object_length_short_dim; } - - void set_min_object_size ( - long long_dim, - long short_dim - ) - { - DLIB_CASSERT(0 < short_dim && short_dim <= long_dim); - min_object_length_long_dim = long_dim; - min_object_length_short_dim = short_dim; - } - - double get_max_object_size ( - ) const { return max_object_size; } - void set_max_object_size ( - double value - ) - { - DLIB_CASSERT(0 < value); - max_object_size = value; - } - - template < - typename array_type - > - void operator() ( - size_t num_crops, - const array_type& images, - const std::vector<std::vector<mmod_rect>>& rects, - array_type& crops, - std::vector<std::vector<mmod_rect>>& crop_rects - ) - { - DLIB_CASSERT(images.size() == rects.size()); - crops.clear(); - crop_rects.clear(); - append(num_crops, images, rects, crops, crop_rects); - } - - template < - typename array_type - > - void append ( - size_t num_crops, - const array_type& images, - const std::vector<std::vector<mmod_rect>>& rects, - array_type& crops, - std::vector<std::vector<mmod_rect>>& crop_rects - ) - { - DLIB_CASSERT(images.size() == rects.size()); - DLIB_CASSERT(crops.size() == crop_rects.size()); - auto original_size = crops.size(); - crops.resize(crops.size()+num_crops); - crop_rects.resize(crop_rects.size()+num_crops); - parallel_for(original_size, original_size+num_crops, [&](long i) { - (*this)(images, rects, crops[i], crop_rects[i]); - }); - } - - - template < - typename array_type, - typename image_type - > - void operator() ( - const array_type& images, - const std::vector<std::vector<mmod_rect>>& rects, - image_type& crop, - std::vector<mmod_rect>& crop_rects - ) - { - DLIB_CASSERT(images.size() == rects.size()); - size_t idx; - { std::lock_guard<std::mutex> lock(rnd_mutex); - idx = rnd.get_integer(images.size()); - } - (*this)(images[idx], rects[idx], crop, crop_rects); - } - - template < - typename image_type1 - > - image_type1 operator() ( - const image_type1& img - ) - { - image_type1 crop; - std::vector<mmod_rect> junk1, junk2; - (*this)(img, junk1, crop, junk2); - return crop; - } - - template < - typename image_type1, - typename image_type2 - > - void operator() ( - const image_type1& img, - const std::vector<mmod_rect>& rects, - image_type2& crop, - std::vector<mmod_rect>& crop_rects - ) - { - DLIB_CASSERT(num_rows(img)*num_columns(img) != 0); - chip_details crop_plan; - bool should_flip_crop; - make_crop_plan(img, rects, crop_plan, should_flip_crop); - - extract_image_chip(img, crop_plan, crop); - const rectangle_transform tform = get_mapping_to_chip(crop_plan); - - // copy rects into crop_rects and set ones that are outside the crop to ignore or - // drop entirely as appropriate. - crop_rects.clear(); - for (auto rect : rects) - { - // map to crop - rect.rect = tform(rect.rect); - - // if the rect is at least partly in the crop - if (get_rect(crop).intersect(rect.rect).area() != 0) - { - // set to ignore if not totally in the crop or if too small. - if (!get_rect(crop).contains(rect.rect) || - ((long)rect.rect.height() < min_object_length_long_dim && (long)rect.rect.width() < min_object_length_long_dim) || - ((long)rect.rect.height() < min_object_length_short_dim || (long)rect.rect.width() < min_object_length_short_dim)) - { - rect.ignore = true; - } - - crop_rects.push_back(rect); - } - } - - // Also randomly flip the image - if (should_flip_crop) - { - image_type2 temp; - flip_image_left_right(crop, temp); - swap(crop,temp); - for (auto&& rect : crop_rects) - rect.rect = impl::flip_rect_left_right(rect.rect, get_rect(crop)); - } - } - - private: - - template <typename image_type1> - void make_crop_plan ( - const image_type1& img, - const std::vector<mmod_rect>& rects, - chip_details& crop_plan, - bool& should_flip_crop - ) - { - std::lock_guard<std::mutex> lock(rnd_mutex); - rectangle crop_rect; - if (has_non_ignored_box(rects) && rnd.get_random_double() >= background_crops_fraction) - { - auto rect = rects[randomly_pick_rect(rects)].rect; - - // perturb the location of the crop by a small fraction of the object's size. - const point rand_translate = dpoint(rnd.get_double_in_range(-translate_amount,translate_amount)*std::max(rect.height(),rect.width()), - rnd.get_double_in_range(-translate_amount,translate_amount)*std::max(rect.height(),rect.width())); - - // We are going to grow rect into the cropping rect. First, we grow it a - // little so that it has the desired minimum border around it. - drectangle drect = centered_drect(center(rect)+rand_translate, rect.width()/max_object_size, rect.height()/max_object_size); - - // Now make rect have the same aspect ratio as dims so that there won't be - // any funny stretching when we crop it. We do this by growing it along - // whichever dimension is too short. - const double target_aspect = dims.cols/(double)dims.rows; - if (drect.width()/drect.height() < target_aspect) - drect = centered_drect(drect, target_aspect*drect.height(), drect.height()); - else - drect = centered_drect(drect, drect.width(), drect.width()/target_aspect); - - // Now perturb the scale of the crop. We do this by shrinking it, but not - // so much that it gets smaller than the min object sizes require. - double current_width = dims.cols*rect.width()/drect.width(); - double current_height = dims.rows*rect.height()/drect.height(); - - // never make any dimension smaller than the short dim. - double min_scale1 = std::max(min_object_length_short_dim/current_width, min_object_length_short_dim/current_height); - // at least one dimension needs to be longer than the long dim. - double min_scale2 = std::min(min_object_length_long_dim/current_width, min_object_length_long_dim/current_height); - double min_scale = std::max(min_scale1, min_scale2); - - const double rand_scale_perturb = 1.0/rnd.get_double_in_range(min_scale, 1); - crop_rect = centered_drect(drect, drect.width()*rand_scale_perturb, drect.height()*rand_scale_perturb); - - } - else - { - crop_rect = make_random_cropping_rect(img); - } - should_flip_crop = randomly_flip && rnd.get_random_double() > 0.5; - const double angle = rnd.get_double_in_range(-max_rotation_degrees, max_rotation_degrees)*pi/180; - crop_plan = chip_details(crop_rect, dims, angle); - } - - bool has_non_ignored_box ( - const std::vector<mmod_rect>& rects - ) const - { - for (auto&& b : rects) - { - if (!b.ignore) - return true; - } - return false; - } - - size_t randomly_pick_rect ( - const std::vector<mmod_rect>& rects - ) - { - DLIB_CASSERT(has_non_ignored_box(rects)); - size_t idx = rnd.get_integer(rects.size()); - while(rects[idx].ignore) - idx = rnd.get_integer(rects.size()); - return idx; - } - - template <typename image_type> - rectangle make_random_cropping_rect( - const image_type& img_ - ) - { - const_image_view<image_type> img(img_); - // Figure out what rectangle we want to crop from the image. We are going to - // crop out an image of size this->dims, so we pick a random scale factor that - // lets this random box be either as big as it can be while still fitting in - // the image or as small as a 3x zoomed in box randomly somewhere in the image. - double mins = 1.0/3.0, maxs = std::min(img.nr()/(double)dims.rows, img.nc()/(double)dims.cols); - mins = std::min(mins, maxs); - auto scale = rnd.get_double_in_range(mins, maxs); - rectangle rect(scale*dims.cols, scale*dims.rows); - // randomly shift the box around - point offset(rnd.get_integer(1+img.nc()-rect.width()), - rnd.get_integer(1+img.nr()-rect.height())); - return move_rect(rect, offset); - } - - - - }; - -// ---------------------------------------------------------------------------------------- - - inline std::ostream& operator<< ( - std::ostream& out, - const random_cropper& item - ) - { - using std::endl; - out << "random_cropper details: " << endl; - out << " chip_dims.rows: " << item.get_chip_dims().rows << endl; - out << " chip_dims.cols: " << item.get_chip_dims().cols << endl; - out << " randomly_flip: " << std::boolalpha << item.get_randomly_flip() << endl; - out << " max_rotation_degrees: " << item.get_max_rotation_degrees() << endl; - out << " min_object_length_long_dim: " << item.get_min_object_length_long_dim() << endl; - out << " min_object_length_short_dim: " << item.get_min_object_length_short_dim() << endl; - out << " max_object_size: " << item.get_max_object_size() << endl; - out << " background_crops_fraction: " << item.get_background_crops_fraction() << endl; - out << " translate_amount: " << item.get_translate_amount() << endl; - return out; - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_RaNDOM_CROPPER_H_ - diff --git a/ml/dlib/dlib/image_transforms/random_cropper_abstract.h b/ml/dlib/dlib/image_transforms/random_cropper_abstract.h deleted file mode 100644 index 7603a1c47..000000000 --- a/ml/dlib/dlib/image_transforms/random_cropper_abstract.h +++ /dev/null @@ -1,346 +0,0 @@ -// Copyright (C) 2016 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_RaNDOM_CROPPER_ABSTRACT_H_ -#ifdef DLIB_RaNDOM_CROPPER_ABSTRACT_H_ - -#include "../threads.h" -#include <mutex> -#include <vector> -#include "interpolation.h" -#include "../image_processing/full_object_detection.h" -#include "../rand.h" - -namespace dlib -{ - class random_cropper - { - /*! - WHAT THIS OBJECT REPRESENTS - This object is a tool for extracting random crops of objects from a set of - images. The crops are randomly jittered in scale, translation, and - rotation but more or less centered on objects specified by mmod_rect - objects. - - THREAD SAFETY - It is safe for multiple threads to make concurrent calls to this object's - operator() methods. - !*/ - - public: - - random_cropper ( - ); - /*! - ensures - - #get_chip_dims() == chip_dims(300,300) - - #get_randomly_flip() == true - - #get_max_rotation_degrees() == 30 - - #get_min_object_length_long_dim() == 70 - - #get_min_object_length_short_dim() == 30 - - #get_max_object_size() == 0.7 - - #get_background_crops_fraction() == 0.5 - - #get_translate_amount() == 0.1 - !*/ - - void set_seed ( - time_t seed - ); - /*! - ensures - - Seeds the internal random number generator with the given seed. - !*/ - - double get_translate_amount ( - ) const; - /*! - ensures - - When a box is cropped out, it will be randomly translated prior to - cropping by #get_translate_amount()*(the box's height) up or down and - #get_translate_amount()*(the box's width) left or right. - !*/ - - void set_translate_amount ( - double value - ); - /*! - requires - - value >= 0 - ensures - - #get_translate_amount() == value - !*/ - - double get_background_crops_fraction ( - ) const; - /*! - ensures - - When making random crops, get_background_crops_fraction() fraction of - them will be from random background rather than being centered on some - object in the dataset. - !*/ - - void set_background_crops_fraction ( - double value - ); - /*! - requires - - 0 <= value <= 1 - ensures - - #get_background_crops_fraction() == value - !*/ - - const chip_dims& get_chip_dims( - ) const; - /*! - ensures - - returns the dimensions of image chips produced by this object. - !*/ - - void set_chip_dims ( - const chip_dims& dims - ); - /*! - ensures - - #get_chip_dims() == dims - !*/ - - void set_chip_dims ( - unsigned long rows, - unsigned long cols - ); - /*! - ensures - - #get_chip_dims() == chip_dims(rows,cols) - !*/ - - bool get_randomly_flip ( - ) const; - /*! - ensures - - if this object will randomly mirror chips left to right. - !*/ - - void set_randomly_flip ( - bool value - ); - /*! - ensures - - #get_randomly_flip() == value - !*/ - - double get_max_rotation_degrees ( - ) const; - /*! - ensures - - When extracting an image chip, this object will pick a random rotation - in the range [-get_max_rotation_degrees(), get_max_rotation_degrees()] - and rotate the chip by that amount. - !*/ - - void set_max_rotation_degrees ( - double value - ); - /*! - ensures - - #get_max_rotation_degrees() == std::abs(value) - !*/ - - long get_min_object_length_long_dim ( - ) const; - /*! - ensures - - When a chip is extracted around an object, the chip will be sized so that - the longest edge of the object (i.e. either its height or width, - whichever is longer) is at least #get_min_object_length_long_dim() pixels - in length. When we say "object" here we are referring specifically to - the rectangle in the mmod_rect output by the cropper. - !*/ - - long get_min_object_length_short_dim ( - ) const; - /*! - ensures - - When a chip is extracted around an object, the chip will be sized so that - the shortest edge of the object (i.e. either its height or width, - whichever is shorter) is at least #get_min_object_length_short_dim() - pixels in length. When we say "object" here we are referring - specifically to the rectangle in the mmod_rect output by the cropper. - !*/ - - void set_min_object_size ( - long long_dim, - long short_dim - ); - /*! - requires - - 0 < short_dim <= long_dim - ensures - - #get_min_object_length_short_dim() == short_dim - - #get_min_object_length_long_dim() == long_dim - !*/ - - double get_max_object_size ( - ) const; - /*! - ensures - - When a chip is extracted around an object, the chip will be sized so that - both the object's height and width are at most get_max_object_size() * - the chip's height and width, respectively. E.g. if the chip is 640x480 - pixels in size then the object will be at most 480*get_max_object_size() - pixels tall and 640*get_max_object_size() pixels wide. - !*/ - - void set_max_object_size ( - double value - ); - /*! - requires - - 0 < value - ensures - - #get_max_object_size() == value - !*/ - - template < - typename array_type - > - void append ( - size_t num_crops, - const array_type& images, - const std::vector<std::vector<mmod_rect>>& rects, - array_type& crops, - std::vector<std::vector<mmod_rect>>& crop_rects - ); - /*! - requires - - images.size() == rects.size() - - crops.size() == crop_rects.size() - - for all valid i: - - images[i].size() != 0 - - array_type is a type with an interface compatible with dlib::array or - std::vector and it must in turn contain image objects that implement the - interface defined in dlib/image_processing/generic_image.h - ensures - - Randomly extracts num_crops chips from images and appends them to the end - of crops. We also copy the object metadata for each extracted crop and - store it into #crop_rects. In particular, calling this function is the - same as making multiple calls to the version of operator() below that - outputs a single crop, except that append() will use multiple CPU cores - to do the processing and is therefore faster. - - #crops.size() == crops.size()+num_crops - - #crop_rects.size() == crop_rects.size()+num_crops - !*/ - - template < - typename array_type - > - void operator() ( - size_t num_crops, - const array_type& images, - const std::vector<std::vector<mmod_rect>>& rects, - array_type& crops, - std::vector<std::vector<mmod_rect>>& crop_rects - ); - /*! - requires - - images.size() == rects.size() - - for all valid i: - - images[i].size() != 0 - - array_type is a type with an interface compatible with dlib::array or - std::vector and it must in turn contain image objects that implement the - interface defined in dlib/image_processing/generic_image.h - ensures - - Randomly extracts num_crops chips from images. We also copy the object - metadata for each extracted crop and store it into #crop_rects. In - particular, calling this function is the same as invoking the version of - operator() below multiple times, except that this version of operator() - will use multiple CPU cores to do the processing and is therefore faster. - - #crops.size() == num_crops - - #crop_rects.size() == num_crops - !*/ - - template < - typename array_type, - typename image_type - > - void operator() ( - const array_type& images, - const std::vector<std::vector<mmod_rect>>& rects, - image_type& crop, - std::vector<mmod_rect>& crop_rects - ); - /*! - requires - - images.size() == rects.size() - - for all valid i: - - images[i].size() != 0 - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - array_type is a type with an interface compatible with dlib::array or - std::vector and it must in turn contain image objects that implement the - interface defined in dlib/image_processing/generic_image.h - ensures - - Selects a random image and creates a random crop from it. Specifically, - we pick a random index IDX < images.size() and then execute - (*this)(images[IDX],rects[IDX],crop,crop_rects) - !*/ - - template < - typename image_type1, - typename image_type2 - > - void operator() ( - const image_type1& img, - const std::vector<mmod_rect>& rects, - image_type2& crop, - std::vector<mmod_rect>& crop_rects - ); - /*! - requires - - img.size() != 0 - - image_type1 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - image_type2 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - ensures - - Extracts a random crop from img and copies over the mmod_rect objects in - rects to #crop_rects if they are contained inside the crop. Moreover, - rectangles are marked as ignore if they aren't completely contained - inside the crop. - - #crop_rects.size() <= rects.size() - !*/ - - template < - typename image_type1 - > - image_type1 operator() ( - const image_type1& img - ); - /*! - requires - - img.size() != 0 - - image_type1 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - ensures - - This function simply calls (*this)(img, junk1, crop, junk2) and returns - crop. Therefore it is simply a convenience function for extracting a - random background patch. - !*/ - }; - -// ---------------------------------------------------------------------------------------- - - std::ostream& operator<< ( - std::ostream& out, - const random_cropper& item - ); - /*! - ensures - - Prints the state of all the parameters of item to out. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_RaNDOM_CROPPER_ABSTRACT_H_ - - diff --git a/ml/dlib/dlib/image_transforms/segment_image.h b/ml/dlib/dlib/image_transforms/segment_image.h deleted file mode 100644 index 3b57e4801..000000000 --- a/ml/dlib/dlib/image_transforms/segment_image.h +++ /dev/null @@ -1,730 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_SEGMENT_ImAGE_Hh_ -#define DLIB_SEGMENT_ImAGE_Hh_ - -#include "segment_image_abstract.h" -#include "../algs.h" -#include <vector> -#include "../geometry.h" -#include "../disjoint_subsets.h" -#include "../set.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - namespace impl - { - template <typename T> - inline T edge_diff_uint( - const T& a, - const T& b - ) - { - if (a > b) - return a - b; - else - return b - a; - } - - // ---------------------------------------- - - template <typename T, typename enabled = void> - struct edge_diff_funct - { - typedef double diff_type; - - template <typename pixel_type> - double operator()( - const pixel_type& a, - const pixel_type& b - ) const - { - return length(pixel_to_vector<double>(a) - pixel_to_vector<double>(b)); - } - }; - - template <> - struct edge_diff_funct<uint8,void> - { - typedef uint8 diff_type; - uint8 operator()( const uint8& a, const uint8& b) const { return edge_diff_uint(a,b); } - }; - - template <> - struct edge_diff_funct<uint16,void> - { - typedef uint16 diff_type; - uint16 operator()( const uint16& a, const uint16& b) const { return edge_diff_uint(a,b); } - }; - - template <> - struct edge_diff_funct<uint32,void> - { - typedef uint32 diff_type; - uint32 operator()( const uint32& a, const uint32& b) const { return edge_diff_uint(a,b); } - }; - - template <> - struct edge_diff_funct<double,void> - { - typedef double diff_type; - double operator()( const double& a, const double& b) const { return std::abs(a-b); } - }; - - template <typename T> - struct edge_diff_funct<T, typename enable_if<is_matrix<T> >::type> - { - typedef double diff_type; - double operator()( - const T& a, - const T& b - ) const - { - return length(a-b); - } - }; - - // ------------------------------------------------------------------------------------ - - template <typename T> - struct graph_image_segmentation_data_T - { - graph_image_segmentation_data_T() : component_size(1), internal_diff(0) {} - unsigned long component_size; - T internal_diff; - }; - - // ------------------------------------------------------------------------------------ - - template <typename T> - struct segment_image_edge_data_T - { - segment_image_edge_data_T (){} - - segment_image_edge_data_T ( - const rectangle& rect, - const point& p1, - const point& p2, - const T& diff_ - ) : - idx1(p1.y()*rect.width() + p1.x()), - idx2(p2.y()*rect.width() + p2.x()), - diff(diff_) - {} - - bool operator<(const segment_image_edge_data_T& item) const - { return diff < item.diff; } - - unsigned long idx1; - unsigned long idx2; - T diff; - }; - - // ------------------------------------------------------------------------------------ - - template <typename image_view_type> - struct uint8_or_uint16_pixels - { - typedef typename image_view_type::pixel_type pixel_type; - const static bool value = is_same_type<pixel_type,uint8>::value || - is_same_type<pixel_type,uint16>::value; - }; - - // This is an overload of get_pixel_edges() that is optimized to segment images - // with 8bit or 16bit pixels very quickly. We do this by using a radix sort - // instead of quicksort. - template <typename in_image_type, typename T> - typename enable_if<uint8_or_uint16_pixels<in_image_type> >::type - get_pixel_edges ( - const in_image_type& in_img, - std::vector<segment_image_edge_data_T<T> >& sorted_edges - ) - { - typedef typename in_image_type::pixel_type ptype; - typedef T diff_type; - std::vector<unsigned long> counts(std::numeric_limits<ptype>::max()+1, 0); - - edge_diff_funct<ptype> edge_diff; - - border_enumerator be(get_rect(in_img), 1); - // we are going to do a radix sort on the edge weights. So the first step - // is to accumulate them into count. - const rectangle area = get_rect(in_img); - while (be.move_next()) - { - const long r = be.element().y(); - const long c = be.element().x(); - const ptype pix = in_img[r][c]; - if (area.contains(c-1,r)) counts[edge_diff(pix, in_img[r ][c-1])] += 1; - if (area.contains(c+1,r)) counts[edge_diff(pix, in_img[r ][c+1])] += 1; - if (area.contains(c ,r-1)) counts[edge_diff(pix, in_img[r-1][c ])] += 1; - if (area.contains(c ,r+1)) counts[edge_diff(pix, in_img[r+1][c ])] += 1; - } - for (long r = 1; r+1 < in_img.nr(); ++r) - { - for (long c = 1; c+1 < in_img.nc(); ++c) - { - const ptype pix = in_img[r][c]; - counts[edge_diff(pix, in_img[r-1][c+1])] += 1; - counts[edge_diff(pix, in_img[r ][c+1])] += 1; - counts[edge_diff(pix, in_img[r+1][c ])] += 1; - counts[edge_diff(pix, in_img[r+1][c+1])] += 1; - } - } - - const unsigned long num_edges = shrink_rect(area,1).area()*4 + in_img.nr()*2*3 - 4 + (in_img.nc()-2)*2*3; - typedef segment_image_edge_data_T<T> segment_image_edge_data; - sorted_edges.resize(num_edges); - - // integrate counts. The idea is to have sorted_edges[counts[i]] be the location that edges - // with an edge_diff of i go. So counts[0] == 0, counts[1] == number of 0 edge diff edges, etc. - unsigned long prev = counts[0]; - for (unsigned long i = 1; i < counts.size(); ++i) - { - const unsigned long temp = counts[i]; - counts[i] += counts[i-1]; - counts[i-1] -= prev; - prev = temp; - } - counts[counts.size()-1] -= prev; - - - // now build a sorted list of all the edges - be.reset(); - while(be.move_next()) - { - const point p = be.element(); - const long r = p.y(); - const long c = p.x(); - const ptype pix = in_img[r][c]; - if (area.contains(c-1,r)) - { - const diff_type diff = edge_diff(pix, in_img[r ][c-1]); - sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c-1,r),diff); - } - - if (area.contains(c+1,r)) - { - const diff_type diff = edge_diff(pix, in_img[r ][c+1]); - sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r),diff); - } - - if (area.contains(c ,r-1)) - { - const diff_type diff = edge_diff(pix, in_img[r-1][c ]); - sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c ,r-1),diff); - } - - if (area.contains(c ,r+1)) - { - const diff_type diff = edge_diff(pix, in_img[r+1][c ]); - sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c ,r+1),diff); - } - } - // same thing as the above loop but now we do it on the interior of the image and therefore - // don't have to include the boundary checking if statements used above. - for (long r = 1; r+1 < in_img.nr(); ++r) - { - for (long c = 1; c+1 < in_img.nc(); ++c) - { - const point p(c,r); - const ptype pix = in_img[r][c]; - diff_type diff; - - diff = edge_diff(pix, in_img[r ][c+1]); - sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r),diff); - diff = edge_diff(pix, in_img[r-1][c+1]); - sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r-1),diff); - diff = edge_diff(pix, in_img[r+1][c+1]); - sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r+1),diff); - diff = edge_diff(pix, in_img[r+1][c ]); - sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c ,r+1),diff); - } - } - } - - // ---------------------------------------------------------------------------------------- - - // This is the general purpose version of get_pixel_edges(). It handles all pixel types. - template <typename in_image_type, typename T> - typename disable_if<uint8_or_uint16_pixels<in_image_type> >::type - get_pixel_edges ( - const in_image_type& in_img, - std::vector<segment_image_edge_data_T<T> >& sorted_edges - ) - { - const rectangle area = get_rect(in_img); - sorted_edges.reserve(area.area()*4); - - typedef typename in_image_type::pixel_type ptype; - edge_diff_funct<ptype> edge_diff; - typedef T diff_type; - typedef segment_image_edge_data_T<T> segment_image_edge_data; - - border_enumerator be(get_rect(in_img), 1); - - // now build a sorted list of all the edges - be.reset(); - while(be.move_next()) - { - const point p = be.element(); - const long r = p.y(); - const long c = p.x(); - const ptype& pix = in_img[r][c]; - if (area.contains(c-1,r)) - { - const diff_type diff = edge_diff(pix, in_img[r ][c-1]); - sorted_edges.push_back(segment_image_edge_data(area,p,point(c-1,r),diff)); - } - - if (area.contains(c+1,r)) - { - const diff_type diff = edge_diff(pix, in_img[r ][c+1]); - sorted_edges.push_back(segment_image_edge_data(area,p,point(c+1,r),diff)); - } - - if (area.contains(c ,r-1)) - { - const diff_type diff = edge_diff(pix, in_img[r-1][c ]); - sorted_edges.push_back( segment_image_edge_data(area,p,point(c ,r-1),diff)); - } - if (area.contains(c ,r+1)) - { - const diff_type diff = edge_diff(pix, in_img[r+1][c ]); - sorted_edges.push_back( segment_image_edge_data(area,p,point(c ,r+1),diff)); - } - } - // same thing as the above loop but now we do it on the interior of the image and therefore - // don't have to include the boundary checking if statements used above. - for (long r = 1; r+1 < in_img.nr(); ++r) - { - for (long c = 1; c+1 < in_img.nc(); ++c) - { - const point p(c,r); - const ptype& pix = in_img[r][c]; - diff_type diff; - - diff = edge_diff(pix, in_img[r ][c+1]); - sorted_edges.push_back( segment_image_edge_data(area,p,point(c+1,r),diff)); - diff = edge_diff(pix, in_img[r+1][c+1]); - sorted_edges.push_back( segment_image_edge_data(area,p,point(c+1,r+1),diff)); - diff = edge_diff(pix, in_img[r+1][c ]); - sorted_edges.push_back( segment_image_edge_data(area,p,point(c ,r+1),diff)); - diff = edge_diff(pix, in_img[r-1][c+1]); - sorted_edges.push_back( segment_image_edge_data(area,p,point(c+1,r-1),diff)); - } - } - - std::sort(sorted_edges.begin(), sorted_edges.end()); - - } - - // ------------------------------------------------------------------------------------ - - } // end of namespace impl - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type - > - void segment_image ( - const in_image_type& in_img_, - out_image_type& out_img_, - const double k = 200, - const unsigned long min_size = 10 - ) - { - using namespace dlib::impl; - typedef typename image_traits<in_image_type>::pixel_type ptype; - typedef typename edge_diff_funct<ptype>::diff_type diff_type; - - // make sure requires clause is not broken - DLIB_ASSERT(is_same_object(in_img_, out_img_) == false, - "\t void segment_image()" - << "\n\t The input images can't be the same object." - ); - - COMPILE_TIME_ASSERT(is_unsigned_type<typename image_traits<out_image_type>::pixel_type>::value); - - const_image_view<in_image_type> in_img(in_img_); - image_view<out_image_type> out_img(out_img_); - - out_img.set_size(in_img.nr(), in_img.nc()); - // don't bother doing anything if the image is too small - if (in_img.nr() < 2 || in_img.nc() < 2) - { - assign_all_pixels(out_img,0); - return; - } - - disjoint_subsets sets; - sets.set_size(in_img.size()); - - std::vector<segment_image_edge_data_T<diff_type> > sorted_edges; - get_pixel_edges(in_img, sorted_edges); - - std::vector<graph_image_segmentation_data_T<diff_type> > data(in_img.size()); - - // now start connecting blobs together to make a minimum spanning tree. - for (unsigned long i = 0; i < sorted_edges.size(); ++i) - { - const unsigned long idx1 = sorted_edges[i].idx1; - const unsigned long idx2 = sorted_edges[i].idx2; - - unsigned long set1 = sets.find_set(idx1); - unsigned long set2 = sets.find_set(idx2); - if (set1 != set2) - { - const diff_type diff = sorted_edges[i].diff; - const diff_type tau1 = static_cast<diff_type>(k/data[set1].component_size); - const diff_type tau2 = static_cast<diff_type>(k/data[set2].component_size); - - const diff_type mint = std::min(data[set1].internal_diff + tau1, - data[set2].internal_diff + tau2); - if (diff <= mint) - { - const unsigned long new_set = sets.merge_sets(set1, set2); - data[new_set].component_size = data[set1].component_size + data[set2].component_size; - data[new_set].internal_diff = diff; - } - } - } - - // now merge any really small blobs - if (min_size != 0) - { - for (unsigned long i = 0; i < sorted_edges.size(); ++i) - { - const unsigned long idx1 = sorted_edges[i].idx1; - const unsigned long idx2 = sorted_edges[i].idx2; - - unsigned long set1 = sets.find_set(idx1); - unsigned long set2 = sets.find_set(idx2); - if (set1 != set2 && (data[set1].component_size < min_size || data[set2].component_size < min_size)) - { - const unsigned long new_set = sets.merge_sets(set1, set2); - data[new_set].component_size = data[set1].component_size + data[set2].component_size; - //data[new_set].internal_diff = sorted_edges[i].diff; - } - } - } - - unsigned long idx = 0; - for (long r = 0; r < out_img.nr(); ++r) - { - for (long c = 0; c < out_img.nc(); ++c) - { - out_img[r][c] = sets.find_set(idx++); - } - } - } - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// Candidate object location generation code. -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - namespace impl - { - struct edge_data - { - double edge_diff; - unsigned long set1; - unsigned long set2; - bool operator<(const edge_data& item) const - { - return edge_diff < item.edge_diff; - } - }; - - template < - typename in_image_type, - typename diff_type - > - void find_basic_candidate_object_locations ( - const in_image_type& in_img, - const std::vector<dlib::impl::segment_image_edge_data_T<diff_type> >& sorted_edges, - std::vector<rectangle>& out_rects, - std::vector<edge_data>& edges, - const double k, - const unsigned long min_size - ) - { - using namespace dlib::impl; - - std::vector<dlib::impl::segment_image_edge_data_T<diff_type> > rejected_edges; - rejected_edges.reserve(sorted_edges.size()); - - out_rects.clear(); - edges.clear(); - - // don't bother doing anything if the image is too small - if (in_img.nr() < 2 || in_img.nc() < 2) - { - return; - } - - disjoint_subsets sets; - sets.set_size(in_img.size()); - - - std::vector<graph_image_segmentation_data_T<diff_type> > data(in_img.size()); - - - - std::pair<unsigned long,unsigned long> last_blob_edge(std::numeric_limits<unsigned long>::max(), - std::numeric_limits<unsigned long>::max());; - // now start connecting blobs together to make a minimum spanning tree. - for (unsigned long i = 0; i < sorted_edges.size(); ++i) - { - const unsigned long idx1 = sorted_edges[i].idx1; - const unsigned long idx2 = sorted_edges[i].idx2; - - unsigned long set1 = sets.find_set(idx1); - unsigned long set2 = sets.find_set(idx2); - if (set1 != set2) - { - const diff_type diff = sorted_edges[i].diff; - const diff_type tau1 = static_cast<diff_type>(k/data[set1].component_size); - const diff_type tau2 = static_cast<diff_type>(k/data[set2].component_size); - - const diff_type mint = std::min(data[set1].internal_diff + tau1, - data[set2].internal_diff + tau2); - if (diff <= mint) - { - const unsigned long new_set = sets.merge_sets(set1, set2); - data[new_set].component_size = data[set1].component_size + data[set2].component_size; - data[new_set].internal_diff = diff; - } - else - { - // Don't bother keeping multiple edges from the same pair of blobs, we - // only need one for what we will do later. - if (std::make_pair(set1,set2) != last_blob_edge) - { - segment_image_edge_data_T<diff_type> temp = sorted_edges[i]; - temp.idx1 = set1; - temp.idx2 = set2; - rejected_edges.push_back(temp); - last_blob_edge = std::make_pair(set1,set2); - } - } - } - } - - - // merge small blobs - for (unsigned long i = 0; i < rejected_edges.size(); ++i) - { - const unsigned long idx1 = rejected_edges[i].idx1; - const unsigned long idx2 = rejected_edges[i].idx2; - - unsigned long set1 = sets.find_set(idx1); - unsigned long set2 = sets.find_set(idx2); - rejected_edges[i].idx1 = set1; - rejected_edges[i].idx2 = set2; - if (set1 != set2 && (data[set1].component_size < min_size || data[set2].component_size < min_size)) - { - const unsigned long new_set = sets.merge_sets(set1, set2); - data[new_set].component_size = data[set1].component_size + data[set2].component_size; - data[new_set].internal_diff = rejected_edges[i].diff; - } - } - - // find bounding boxes of each blob - std::map<unsigned long, rectangle> boxes; - std::map<unsigned long, unsigned long> box_id_map; - unsigned long idx = 0; - for (long r = 0; r < in_img.nr(); ++r) - { - for (long c = 0; c < in_img.nc(); ++c) - { - const unsigned long id = sets.find_set(idx++); - // Accumulate the current point into its box and if it is the first point - // in the box then also record the id number for this box. - if ((boxes[id] += point(c,r)).area() == 1) - box_id_map[id] = boxes.size()-1; - } - } - - // copy boxes into out_rects - out_rects.resize(boxes.size()); - for (std::map<unsigned long,rectangle>::iterator i = boxes.begin(); i != boxes.end(); ++i) - { - out_rects[box_id_map[i->first]] = i->second; - } - - // Now find the edges between the boxes - typedef dlib::memory_manager<char>::kernel_2c mm_type; - dlib::set<std::pair<unsigned long, unsigned long>, mm_type>::kernel_1a neighbors_final; - for (unsigned long i = 0; i < rejected_edges.size(); ++i) - { - const unsigned long idx1 = rejected_edges[i].idx1; - const unsigned long idx2 = rejected_edges[i].idx2; - - unsigned long set1 = sets.find_set(idx1); - unsigned long set2 = sets.find_set(idx2); - if (set1 != set2) - { - std::pair<unsigned long, unsigned long> p = std::make_pair(set1,set2); - if (!neighbors_final.is_member(p)) - { - neighbors_final.add(p); - - edge_data temp; - const diff_type mint = std::min(data[set1].internal_diff , - data[set2].internal_diff ); - temp.edge_diff = rejected_edges[i].diff - mint; - temp.set1 = box_id_map[set1]; - temp.set2 = box_id_map[set2]; - edges.push_back(temp); - } - } - } - - std::sort(edges.begin(), edges.end()); - } - } // end namespace impl - -// ---------------------------------------------------------------------------------------- - - template <typename alloc> - void remove_duplicates ( - std::vector<rectangle,alloc>& rects - ) - { - std::sort(rects.begin(), rects.end(), std::less<rectangle>()); - unsigned long num_unique = 1; - for (unsigned long i = 1; i < rects.size(); ++i) - { - if (rects[i] != rects[i-1]) - { - rects[num_unique++] = rects[i]; - } - } - if (rects.size() != 0) - rects.resize(num_unique); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename EXP - > - void find_candidate_object_locations ( - const in_image_type& in_img_, - std::vector<rectangle>& rects, - const matrix_exp<EXP>& kvals, - const unsigned long min_size = 20, - const unsigned long max_merging_iterations = 50 - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_vector(kvals) && kvals.size() > 0, - "\t void find_candidate_object_locations()" - << "\n\t Invalid inputs were given to this function." - << "\n\t is_vector(kvals): " << is_vector(kvals) - << "\n\t kvals.size(): " << kvals.size() - ); - - typedef dlib::memory_manager<char>::kernel_2c mm_type; - typedef dlib::set<rectangle, mm_type>::kernel_1a set_of_rects; - - using namespace dlib::impl; - typedef typename image_traits<in_image_type>::pixel_type ptype; - typedef typename edge_diff_funct<ptype>::diff_type diff_type; - - const_image_view<in_image_type> in_img(in_img_); - - // don't bother doing anything if the image is too small - if (in_img.nr() < 2 || in_img.nc() < 2) - { - return; - } - - std::vector<edge_data> edges; - std::vector<rectangle> working_rects; - std::vector<segment_image_edge_data_T<diff_type> > sorted_edges; - get_pixel_edges(in_img, sorted_edges); - - disjoint_subsets sets; - - for (long j = 0; j < kvals.size(); ++j) - { - const double k = kvals(j); - - find_basic_candidate_object_locations(in_img, sorted_edges, working_rects, edges, k, min_size); - rects.insert(rects.end(), working_rects.begin(), working_rects.end()); - - - // Now iteratively merge all the rectangles we have and record the results. - // Note that, unlike what is described in the paper - // Segmentation as Selective Search for Object Recognition" by Koen E. A. van de Sande, et al. - // we don't use any kind of histogram/SIFT like thing to order the edges - // between the blobs. Here we simply order by the pixel difference value. - // Additionally, note that we keep progressively merging boxes in the outer - // loop rather than performing just a single iteration as indicated in the - // paper. - set_of_rects detected_rects; - bool did_merge = true; - for (unsigned long iter = 0; did_merge && iter < max_merging_iterations; ++iter) - { - did_merge = false; - sets.clear(); - sets.set_size(working_rects.size()); - - // recursively merge neighboring blobs until we have merged everything - for (unsigned long i = 0; i < edges.size(); ++i) - { - edge_data temp = edges[i]; - - temp.set1 = sets.find_set(temp.set1); - temp.set2 = sets.find_set(temp.set2); - if (temp.set1 != temp.set2) - { - rectangle merged_rect = working_rects[temp.set1] + working_rects[temp.set2]; - // Skip merging this pair of blobs if it was merged in a previous - // iteration. Doing this lets us consider other possible blob - // merges. - if (!detected_rects.is_member(merged_rect)) - { - const unsigned long new_set = sets.merge_sets(temp.set1, temp.set2); - rects.push_back(merged_rect); - working_rects[new_set] = merged_rect; - did_merge = true; - detected_rects.add(merged_rect); - } - } - } - } - } - - remove_duplicates(rects); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type - > - void find_candidate_object_locations ( - const in_image_type& in_img, - std::vector<rectangle>& rects - ) - { - find_candidate_object_locations(in_img, rects, linspace(50, 200, 3)); - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SEGMENT_ImAGE_Hh_ - diff --git a/ml/dlib/dlib/image_transforms/segment_image_abstract.h b/ml/dlib/dlib/image_transforms/segment_image_abstract.h deleted file mode 100644 index af1af46a1..000000000 --- a/ml/dlib/dlib/image_transforms/segment_image_abstract.h +++ /dev/null @@ -1,126 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_SEGMENT_ImAGE_ABSTRACT_Hh_ -#ifdef DLIB_SEGMENT_ImAGE_ABSTRACT_Hh_ - -#include <vector> -#include "../matrix.h" -#include "../image_processing/generic_image.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type - > - void segment_image ( - const in_image_type& in_img, - out_image_type& out_img, - const double k = 200, - const unsigned long min_size = 10 - ); - /*! - requires - - in_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - out_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - in_image_type can contain any pixel type with a pixel_traits specialization - or a dlib matrix object representing a row or column vector. - - out_image_type must contain an unsigned integer pixel type. - - is_same_object(in_img, out_img) == false - ensures - - Attempts to segment in_img into regions which have some visual consistency to - them. In particular, this function implements the algorithm described in the - paper: Efficient Graph-Based Image Segmentation by Felzenszwalb and Huttenlocher. - - #out_img.nr() == in_img.nr() - - #out_img.nc() == in_img.nc() - - for all valid r and c: - - #out_img[r][c] == an integer value indicating the identity of the segment - containing the pixel in_img[r][c]. - - The k parameter is a measure used to influence how large the segment regions - will be. Larger k generally results in larger segments being produced. For - a deeper discussion of the k parameter you should consult the above - referenced paper. - - min_size is a lower bound on the size of the output segments. That is, it is - guaranteed that all output segments will have at least min_size pixels in - them (unless the whole image contains fewer than min_size pixels, in this - case the entire image will be put into a single segment). - !*/ - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename EXP - > - void find_candidate_object_locations ( - const in_image_type& in_img, - std::vector<rectangle>& rects, - const matrix_exp<EXP>& kvals = linspace(50, 200, 3), - const unsigned long min_size = 20, - const unsigned long max_merging_iterations = 50 - ); - /*! - requires - - in_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - is_vector(kvals) == true - - kvals.size() > 0 - ensures - - This function takes an input image and generates a set of candidate - rectangles which are expected to bound any objects in the image. It does - this by running a version of the segment_image() routine on the image and - then reports rectangles containing each of the segments as well as rectangles - containing unions of adjacent segments. The basic idea is described in the - paper: - Segmentation as Selective Search for Object Recognition by Koen E. A. van de Sande, et al. - Note that this function deviates from what is described in the paper slightly. - See the code for details. - - The basic segmentation is performed kvals.size() times, each time with the k - parameter (see segment_image() and the Felzenszwalb paper for details on k) - set to a different value from kvals. - - When doing the basic segmentations prior to any box merging, we discard all - rectangles that have an area < min_size. Therefore, all outputs and - subsequent merged rectangles are built out of rectangles that contain at - least min_size pixels. Note that setting min_size to a smaller value than - you might otherwise be interested in using can be useful since it allows a - larger number of possible merged boxes to be created. - - There are max_merging_iterations rounds of neighboring blob merging. - Therefore, this parameter has some effect on the number of output rectangles - you get, with larger values of the parameter giving more output rectangles. - - This function appends the output rectangles into #rects. This means that any - rectangles in rects before this function was called will still be in there - after it terminates. Note further that #rects will not contain any duplicate - rectangles. That is, for all valid i and j where i != j it will be true - that: - - #rects[i] != rects[j] - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename alloc - > - void remove_duplicates ( - std::vector<rectangle,alloc>& rects - ); - /*! - ensures - - This function finds any duplicate rectangles in rects and removes the extra - instances. This way, the result is that rects contains only unique rectangle - instances. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SEGMENT_ImAGE_ABSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/image_transforms/spatial_filtering.h b/ml/dlib/dlib/image_transforms/spatial_filtering.h deleted file mode 100644 index 91dcae321..000000000 --- a/ml/dlib/dlib/image_transforms/spatial_filtering.h +++ /dev/null @@ -1,1580 +0,0 @@ -// Copyright (C) 2006 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_SPATIAL_FILTERINg_H_ -#define DLIB_SPATIAL_FILTERINg_H_ - -#include "../pixel.h" -#include "spatial_filtering_abstract.h" -#include "../algs.h" -#include "../assert.h" -#include "../array2d.h" -#include "../matrix.h" -#include "../geometry/border_enumerator.h" -#include "../simd.h" -#include <limits> -#include "assign_image.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - namespace impl - { - template < - typename in_image_type, - typename out_image_type, - typename EXP, - typename T - > - rectangle grayscale_spatially_filter_image ( - const in_image_type& in_img_, - out_image_type& out_img_, - const matrix_exp<EXP>& filter_, - T scale, - bool use_abs, - bool add_to - ) - { - const_temp_matrix<EXP> filter(filter_); - COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false ); - COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false ); - - DLIB_ASSERT(scale != 0 && filter.size() != 0, - "\trectangle spatially_filter_image()" - << "\n\t You can't give a scale of zero or an empty filter." - << "\n\t scale: "<< scale - << "\n\t filter.nr(): "<< filter.nr() - << "\n\t filter.nc(): "<< filter.nc() - ); - DLIB_ASSERT(is_same_object(in_img_, out_img_) == false, - "\trectangle spatially_filter_image()" - << "\n\tYou must give two different image objects" - ); - - - const_image_view<in_image_type> in_img(in_img_); - image_view<out_image_type> out_img(out_img_); - - // if there isn't any input image then don't do anything - if (in_img.size() == 0) - { - out_img.clear(); - return rectangle(); - } - - out_img.set_size(in_img.nr(),in_img.nc()); - - - // figure out the range that we should apply the filter to - const long first_row = filter.nr()/2; - const long first_col = filter.nc()/2; - const long last_row = in_img.nr() - ((filter.nr()-1)/2); - const long last_col = in_img.nc() - ((filter.nc()-1)/2); - - const rectangle non_border = rectangle(first_col, first_row, last_col-1, last_row-1); - if (!add_to) - zero_border_pixels(out_img_, non_border); - - // apply the filter to the image - for (long r = first_row; r < last_row; ++r) - { - for (long c = first_col; c < last_col; ++c) - { - typedef typename EXP::type ptype; - ptype p; - ptype temp = 0; - for (long m = 0; m < filter.nr(); ++m) - { - for (long n = 0; n < filter.nc(); ++n) - { - // pull out the current pixel and put it into p - p = get_pixel_intensity(in_img[r-first_row+m][c-first_col+n]); - temp += p*filter(m,n); - } - } - - temp /= scale; - - if (use_abs && temp < 0) - { - temp = -temp; - } - - // save this pixel to the output image - if (add_to == false) - { - assign_pixel(out_img[r][c], temp); - } - else - { - assign_pixel(out_img[r][c], temp + out_img[r][c]); - } - } - } - - return non_border; - } - - // ------------------------------------------------------------------------------------ - - template < - typename in_image_type, - typename out_image_type, - typename EXP - > - rectangle float_spatially_filter_image ( - const in_image_type& in_img_, - out_image_type& out_img_, - const matrix_exp<EXP>& filter_, - bool add_to - ) - { - - const_temp_matrix<EXP> filter(filter_); - DLIB_ASSERT(filter.size() != 0, - "\trectangle spatially_filter_image()" - << "\n\t You can't give an empty filter." - << "\n\t filter.nr(): "<< filter.nr() - << "\n\t filter.nc(): "<< filter.nc() - ); - DLIB_ASSERT(is_same_object(in_img_, out_img_) == false, - "\trectangle spatially_filter_image()" - << "\n\tYou must give two different image objects" - ); - - - const_image_view<in_image_type> in_img(in_img_); - image_view<out_image_type> out_img(out_img_); - - // if there isn't any input image then don't do anything - if (in_img.size() == 0) - { - out_img.clear(); - return rectangle(); - } - - out_img.set_size(in_img.nr(),in_img.nc()); - - - // figure out the range that we should apply the filter to - const long first_row = filter.nr()/2; - const long first_col = filter.nc()/2; - const long last_row = in_img.nr() - ((filter.nr()-1)/2); - const long last_col = in_img.nc() - ((filter.nc()-1)/2); - - const rectangle non_border = rectangle(first_col, first_row, last_col-1, last_row-1); - if (!add_to) - zero_border_pixels(out_img_, non_border); - - // apply the filter to the image - for (long r = first_row; r < last_row; ++r) - { - long c = first_col; - for (; c < last_col-7; c+=8) - { - simd8f p,p2,p3; - simd8f temp = 0, temp2=0, temp3=0; - for (long m = 0; m < filter.nr(); ++m) - { - long n = 0; - for (; n < filter.nc()-2; n+=3) - { - // pull out the current pixel and put it into p - p.load(&in_img[r-first_row+m][c-first_col+n]); - p2.load(&in_img[r-first_row+m][c-first_col+n+1]); - p3.load(&in_img[r-first_row+m][c-first_col+n+2]); - temp += p*filter(m,n); - temp2 += p2*filter(m,n+1); - temp3 += p3*filter(m,n+2); - } - for (; n < filter.nc(); ++n) - { - // pull out the current pixel and put it into p - p.load(&in_img[r-first_row+m][c-first_col+n]); - temp += p*filter(m,n); - } - } - temp += temp2+temp3; - - // save this pixel to the output image - if (add_to == false) - { - temp.store(&out_img[r][c]); - } - else - { - p.load(&out_img[r][c]); - temp += p; - temp.store(&out_img[r][c]); - } - } - for (; c < last_col; ++c) - { - float p; - float temp = 0; - for (long m = 0; m < filter.nr(); ++m) - { - for (long n = 0; n < filter.nc(); ++n) - { - // pull out the current pixel and put it into p - p = in_img[r-first_row+m][c-first_col+n]; - temp += p*filter(m,n); - } - } - - // save this pixel to the output image - if (add_to == false) - { - out_img[r][c] = temp; - } - else - { - out_img[r][c] += temp; - } - } - } - - return non_border; - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type, - typename EXP - > - struct is_float_filtering2 - { - const static bool value = is_same_type<typename image_traits<in_image_type>::pixel_type,float>::value && - is_same_type<typename image_traits<out_image_type>::pixel_type,float>::value && - is_same_type<typename EXP::type,float>::value; - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type, - typename EXP, - typename T - > - typename enable_if_c<pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale && - is_float_filtering2<in_image_type,out_image_type,EXP>::value,rectangle>::type - spatially_filter_image ( - const in_image_type& in_img, - out_image_type& out_img, - const matrix_exp<EXP>& filter, - T scale, - bool use_abs = false, - bool add_to = false - ) - { - if (use_abs == false) - { - if (scale == 1) - return impl::float_spatially_filter_image(in_img, out_img, filter, add_to); - else - return impl::float_spatially_filter_image(in_img, out_img, filter/scale, add_to); - } - else - { - return impl::grayscale_spatially_filter_image(in_img, out_img, filter, scale, true, add_to); - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type, - typename EXP, - typename T - > - typename enable_if_c<pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale && - !is_float_filtering2<in_image_type,out_image_type,EXP>::value,rectangle>::type - spatially_filter_image ( - const in_image_type& in_img, - out_image_type& out_img, - const matrix_exp<EXP>& filter, - T scale, - bool use_abs = false, - bool add_to = false - ) - { - return impl::grayscale_spatially_filter_image(in_img,out_img,filter,scale,use_abs,add_to); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type, - typename EXP, - typename T - > - typename disable_if_c<pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale,rectangle>::type - spatially_filter_image ( - const in_image_type& in_img_, - out_image_type& out_img_, - const matrix_exp<EXP>& filter_, - T scale - ) - { - const_temp_matrix<EXP> filter(filter_); - COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false ); - COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false ); - - DLIB_ASSERT(scale != 0 && filter.size() != 0, - "\trectangle spatially_filter_image()" - << "\n\t You can't give a scale of zero or an empty filter." - << "\n\t scale: "<< scale - << "\n\t filter.nr(): "<< filter.nr() - << "\n\t filter.nc(): "<< filter.nc() - ); - DLIB_ASSERT(is_same_object(in_img_, out_img_) == false, - "\trectangle spatially_filter_image()" - << "\n\tYou must give two different image objects" - ); - - - const_image_view<in_image_type> in_img(in_img_); - image_view<out_image_type> out_img(out_img_); - - // if there isn't any input image then don't do anything - if (in_img.size() == 0) - { - out_img.clear(); - return rectangle(); - } - - out_img.set_size(in_img.nr(),in_img.nc()); - - - // figure out the range that we should apply the filter to - const long first_row = filter.nr()/2; - const long first_col = filter.nc()/2; - const long last_row = in_img.nr() - ((filter.nr()-1)/2); - const long last_col = in_img.nc() - ((filter.nc()-1)/2); - - const rectangle non_border = rectangle(first_col, first_row, last_col-1, last_row-1); - zero_border_pixels(out_img, non_border); - - // apply the filter to the image - for (long r = first_row; r < last_row; ++r) - { - for (long c = first_col; c < last_col; ++c) - { - typedef typename image_traits<in_image_type>::pixel_type pixel_type; - typedef matrix<typename EXP::type,pixel_traits<pixel_type>::num,1> ptype; - ptype p; - ptype temp; - temp = 0; - for (long m = 0; m < filter.nr(); ++m) - { - for (long n = 0; n < filter.nc(); ++n) - { - // pull out the current pixel and put it into p - p = pixel_to_vector<typename EXP::type>(in_img[r-first_row+m][c-first_col+n]); - temp += p*filter(m,n); - } - } - - temp /= scale; - - pixel_type pp; - vector_to_pixel(pp, temp); - assign_pixel(out_img[r][c], pp); - } - } - - return non_border; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type, - typename EXP - > - rectangle spatially_filter_image ( - const in_image_type& in_img, - out_image_type& out_img, - const matrix_exp<EXP>& filter - ) - { - return spatially_filter_image(in_img,out_img,filter,1); - } - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - namespace impl - { - template < - typename in_image_type, - typename out_image_type, - typename EXP1, - typename EXP2, - typename T - > - rectangle grayscale_spatially_filter_image_separable ( - const in_image_type& in_img_, - out_image_type& out_img_, - const matrix_exp<EXP1>& _row_filter, - const matrix_exp<EXP2>& _col_filter, - T scale, - bool use_abs, - bool add_to - ) - { - const_temp_matrix<EXP1> row_filter(_row_filter); - const_temp_matrix<EXP2> col_filter(_col_filter); - COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false ); - COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false ); - - DLIB_ASSERT(scale != 0 && row_filter.size() != 0 && col_filter.size() != 0 && - is_vector(row_filter) && - is_vector(col_filter), - "\trectangle spatially_filter_image_separable()" - << "\n\t Invalid inputs were given to this function." - << "\n\t scale: "<< scale - << "\n\t row_filter.size(): "<< row_filter.size() - << "\n\t col_filter.size(): "<< col_filter.size() - << "\n\t is_vector(row_filter): "<< is_vector(row_filter) - << "\n\t is_vector(col_filter): "<< is_vector(col_filter) - ); - DLIB_ASSERT(is_same_object(in_img_, out_img_) == false, - "\trectangle spatially_filter_image_separable()" - << "\n\tYou must give two different image objects" - ); - - - const_image_view<in_image_type> in_img(in_img_); - image_view<out_image_type> out_img(out_img_); - - // if there isn't any input image then don't do anything - if (in_img.size() == 0) - { - out_img.clear(); - return rectangle(); - } - - out_img.set_size(in_img.nr(),in_img.nc()); - - - // figure out the range that we should apply the filter to - const long first_row = col_filter.size()/2; - const long first_col = row_filter.size()/2; - const long last_row = in_img.nr() - ((col_filter.size()-1)/2); - const long last_col = in_img.nc() - ((row_filter.size()-1)/2); - - const rectangle non_border = rectangle(first_col, first_row, last_col-1, last_row-1); - if (!add_to) - zero_border_pixels(out_img, non_border); - - typedef typename EXP1::type ptype; - - array2d<ptype> temp_img; - temp_img.set_size(in_img.nr(), in_img.nc()); - - // apply the row filter - for (long r = 0; r < in_img.nr(); ++r) - { - for (long c = first_col; c < last_col; ++c) - { - ptype p; - ptype temp = 0; - for (long n = 0; n < row_filter.size(); ++n) - { - // pull out the current pixel and put it into p - p = get_pixel_intensity(in_img[r][c-first_col+n]); - temp += p*row_filter(n); - } - temp_img[r][c] = temp; - } - } - - // apply the column filter - for (long r = first_row; r < last_row; ++r) - { - for (long c = first_col; c < last_col; ++c) - { - ptype temp = 0; - for (long m = 0; m < col_filter.size(); ++m) - { - temp += temp_img[r-first_row+m][c]*col_filter(m); - } - - temp /= scale; - - if (use_abs && temp < 0) - { - temp = -temp; - } - - // save this pixel to the output image - if (add_to == false) - { - assign_pixel(out_img[r][c], temp); - } - else - { - assign_pixel(out_img[r][c], temp + out_img[r][c]); - } - } - } - return non_border; - } - - } // namespace impl - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type, - typename EXP1, - typename EXP2 - > - struct is_float_filtering - { - const static bool value = is_same_type<typename image_traits<in_image_type>::pixel_type,float>::value && - is_same_type<typename image_traits<out_image_type>::pixel_type,float>::value && - is_same_type<typename EXP1::type,float>::value && - is_same_type<typename EXP2::type,float>::value; - }; - -// ---------------------------------------------------------------------------------------- - - // This overload is optimized to use SIMD instructions when filtering float images with - // float filters. - template < - typename in_image_type, - typename out_image_type, - typename EXP1, - typename EXP2 - > - rectangle float_spatially_filter_image_separable ( - const in_image_type& in_img_, - out_image_type& out_img_, - const matrix_exp<EXP1>& _row_filter, - const matrix_exp<EXP2>& _col_filter, - out_image_type& scratch_, - bool add_to = false - ) - { - // You can only use this function with images and filters containing float - // variables. - COMPILE_TIME_ASSERT((is_float_filtering<in_image_type,out_image_type,EXP1,EXP2>::value == true)); - - - const_temp_matrix<EXP1> row_filter(_row_filter); - const_temp_matrix<EXP2> col_filter(_col_filter); - DLIB_ASSERT(row_filter.size() != 0 && col_filter.size() != 0 && - is_vector(row_filter) && - is_vector(col_filter), - "\trectangle float_spatially_filter_image_separable()" - << "\n\t Invalid inputs were given to this function." - << "\n\t row_filter.size(): "<< row_filter.size() - << "\n\t col_filter.size(): "<< col_filter.size() - << "\n\t is_vector(row_filter): "<< is_vector(row_filter) - << "\n\t is_vector(col_filter): "<< is_vector(col_filter) - ); - DLIB_ASSERT(is_same_object(in_img_, out_img_) == false, - "\trectangle float_spatially_filter_image_separable()" - << "\n\tYou must give two different image objects" - ); - - - const_image_view<in_image_type> in_img(in_img_); - image_view<out_image_type> out_img(out_img_); - - // if there isn't any input image then don't do anything - if (in_img.size() == 0) - { - out_img.clear(); - return rectangle(); - } - - out_img.set_size(in_img.nr(),in_img.nc()); - - // figure out the range that we should apply the filter to - const long first_row = col_filter.size()/2; - const long first_col = row_filter.size()/2; - const long last_row = in_img.nr() - ((col_filter.size()-1)/2); - const long last_col = in_img.nc() - ((row_filter.size()-1)/2); - - const rectangle non_border = rectangle(first_col, first_row, last_col-1, last_row-1); - if (!add_to) - zero_border_pixels(out_img, non_border); - - image_view<out_image_type> scratch(scratch_); - scratch.set_size(in_img.nr(), in_img.nc()); - - // apply the row filter - for (long r = 0; r < in_img.nr(); ++r) - { - long c = first_col; - for (; c < last_col-7; c+=8) - { - simd8f p,p2,p3, temp = 0, temp2=0, temp3=0; - long n = 0; - for (; n < row_filter.size()-2; n+=3) - { - // pull out the current pixel and put it into p - p.load(&in_img[r][c-first_col+n]); - p2.load(&in_img[r][c-first_col+n+1]); - p3.load(&in_img[r][c-first_col+n+2]); - temp += p*row_filter(n); - temp2 += p2*row_filter(n+1); - temp3 += p3*row_filter(n+2); - } - for (; n < row_filter.size(); ++n) - { - // pull out the current pixel and put it into p - p.load(&in_img[r][c-first_col+n]); - temp += p*row_filter(n); - } - temp += temp2 + temp3; - temp.store(&scratch[r][c]); - } - for (; c < last_col; ++c) - { - float p; - float temp = 0; - for (long n = 0; n < row_filter.size(); ++n) - { - // pull out the current pixel and put it into p - p = in_img[r][c-first_col+n]; - temp += p*row_filter(n); - } - scratch[r][c] = temp; - } - } - - // apply the column filter - for (long r = first_row; r < last_row; ++r) - { - long c = first_col; - for (; c < last_col-7; c+=8) - { - simd8f p, p2, p3, temp = 0, temp2 = 0, temp3 = 0; - long m = 0; - for (; m < col_filter.size()-2; m+=3) - { - p.load(&scratch[r-first_row+m][c]); - p2.load(&scratch[r-first_row+m+1][c]); - p3.load(&scratch[r-first_row+m+2][c]); - temp += p*col_filter(m); - temp2 += p2*col_filter(m+1); - temp3 += p3*col_filter(m+2); - } - for (; m < col_filter.size(); ++m) - { - p.load(&scratch[r-first_row+m][c]); - temp += p*col_filter(m); - } - temp += temp2+temp3; - - // save this pixel to the output image - if (add_to == false) - { - temp.store(&out_img[r][c]); - } - else - { - p.load(&out_img[r][c]); - temp += p; - temp.store(&out_img[r][c]); - } - } - for (; c < last_col; ++c) - { - float temp = 0; - for (long m = 0; m < col_filter.size(); ++m) - { - temp += scratch[r-first_row+m][c]*col_filter(m); - } - - // save this pixel to the output image - if (add_to == false) - { - out_img[r][c] = temp; - } - else - { - out_img[r][c] += temp; - } - } - } - return non_border; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type, - typename EXP1, - typename EXP2, - typename T - > - typename enable_if_c<pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale && - is_float_filtering<in_image_type,out_image_type,EXP1,EXP2>::value,rectangle>::type - spatially_filter_image_separable ( - const in_image_type& in_img, - out_image_type& out_img, - const matrix_exp<EXP1>& row_filter, - const matrix_exp<EXP2>& col_filter, - T scale, - bool use_abs = false, - bool add_to = false - ) - { - if (use_abs == false) - { - out_image_type scratch; - if (scale == 1) - return float_spatially_filter_image_separable(in_img, out_img, row_filter, col_filter, scratch, add_to); - else - return float_spatially_filter_image_separable(in_img, out_img, row_filter/scale, col_filter, scratch, add_to); - } - else - { - return impl::grayscale_spatially_filter_image_separable(in_img, out_img, row_filter, col_filter, scale, true, add_to); - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type, - typename EXP1, - typename EXP2, - typename T - > - typename enable_if_c<pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale && - !is_float_filtering<in_image_type,out_image_type,EXP1,EXP2>::value,rectangle>::type - spatially_filter_image_separable ( - const in_image_type& in_img, - out_image_type& out_img, - const matrix_exp<EXP1>& row_filter, - const matrix_exp<EXP2>& col_filter, - T scale, - bool use_abs = false, - bool add_to = false - ) - { - return impl::grayscale_spatially_filter_image_separable(in_img,out_img, row_filter, col_filter, scale, use_abs, add_to); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type, - typename EXP1, - typename EXP2, - typename T - > - typename disable_if_c<pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale,rectangle>::type - spatially_filter_image_separable ( - const in_image_type& in_img_, - out_image_type& out_img_, - const matrix_exp<EXP1>& _row_filter, - const matrix_exp<EXP2>& _col_filter, - T scale - ) - { - const_temp_matrix<EXP1> row_filter(_row_filter); - const_temp_matrix<EXP2> col_filter(_col_filter); - COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false ); - COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false ); - - DLIB_ASSERT(scale != 0 && row_filter.size() != 0 && col_filter.size() != 0 && - is_vector(row_filter) && - is_vector(col_filter), - "\trectangle spatially_filter_image_separable()" - << "\n\t Invalid inputs were given to this function." - << "\n\t scale: "<< scale - << "\n\t row_filter.size(): "<< row_filter.size() - << "\n\t col_filter.size(): "<< col_filter.size() - << "\n\t is_vector(row_filter): "<< is_vector(row_filter) - << "\n\t is_vector(col_filter): "<< is_vector(col_filter) - ); - DLIB_ASSERT(is_same_object(in_img_, out_img_) == false, - "\trectangle spatially_filter_image_separable()" - << "\n\tYou must give two different image objects" - ); - - - const_image_view<in_image_type> in_img(in_img_); - image_view<out_image_type> out_img(out_img_); - - // if there isn't any input image then don't do anything - if (in_img.size() == 0) - { - out_img.clear(); - return rectangle(); - } - - out_img.set_size(in_img.nr(),in_img.nc()); - - - // figure out the range that we should apply the filter to - const long first_row = col_filter.size()/2; - const long first_col = row_filter.size()/2; - const long last_row = in_img.nr() - ((col_filter.size()-1)/2); - const long last_col = in_img.nc() - ((row_filter.size()-1)/2); - - const rectangle non_border = rectangle(first_col, first_row, last_col-1, last_row-1); - zero_border_pixels(out_img, non_border); - - typedef typename image_traits<in_image_type>::pixel_type pixel_type; - typedef matrix<typename EXP1::type,pixel_traits<pixel_type>::num,1> ptype; - - array2d<ptype> temp_img; - temp_img.set_size(in_img.nr(), in_img.nc()); - - // apply the row filter - for (long r = 0; r < in_img.nr(); ++r) - { - for (long c = first_col; c < last_col; ++c) - { - ptype p; - ptype temp; - temp = 0; - for (long n = 0; n < row_filter.size(); ++n) - { - // pull out the current pixel and put it into p - p = pixel_to_vector<typename EXP1::type>(in_img[r][c-first_col+n]); - temp += p*row_filter(n); - } - temp_img[r][c] = temp; - } - } - - // apply the column filter - for (long r = first_row; r < last_row; ++r) - { - for (long c = first_col; c < last_col; ++c) - { - ptype temp; - temp = 0; - for (long m = 0; m < col_filter.size(); ++m) - { - temp += temp_img[r-first_row+m][c]*col_filter(m); - } - - temp /= scale; - - - // save this pixel to the output image - pixel_type p; - vector_to_pixel(p, temp); - assign_pixel(out_img[r][c], p); - } - } - return non_border; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type, - typename EXP1, - typename EXP2 - > - rectangle spatially_filter_image_separable ( - const in_image_type& in_img, - out_image_type& out_img, - const matrix_exp<EXP1>& row_filter, - const matrix_exp<EXP2>& col_filter - ) - { - return spatially_filter_image_separable(in_img,out_img,row_filter,col_filter,1); - } - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type, - typename EXP1, - typename EXP2, - typename T - > - rectangle spatially_filter_image_separable_down ( - const unsigned long downsample, - const in_image_type& in_img_, - out_image_type& out_img_, - const matrix_exp<EXP1>& row_filter, - const matrix_exp<EXP2>& col_filter, - T scale, - bool use_abs = false, - bool add_to = false - ) - { - COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false ); - COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false ); - COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale == true ); - - DLIB_ASSERT(downsample > 0 && - scale != 0 && - row_filter.size()%2 == 1 && - col_filter.size()%2 == 1 && - is_vector(row_filter) && - is_vector(col_filter), - "\trectangle spatially_filter_image_separable_down()" - << "\n\t Invalid inputs were given to this function." - << "\n\t downsample: "<< downsample - << "\n\t scale: "<< scale - << "\n\t row_filter.size(): "<< row_filter.size() - << "\n\t col_filter.size(): "<< col_filter.size() - << "\n\t is_vector(row_filter): "<< is_vector(row_filter) - << "\n\t is_vector(col_filter): "<< is_vector(col_filter) - ); - DLIB_ASSERT(is_same_object(in_img_, out_img_) == false, - "\trectangle spatially_filter_image_separable_down()" - << "\n\tYou must give two different image objects" - ); - - - const_image_view<in_image_type> in_img(in_img_); - image_view<out_image_type> out_img(out_img_); - - // if there isn't any input image then don't do anything - if (in_img.size() == 0) - { - out_img.clear(); - return rectangle(); - } - - out_img.set_size((long)(std::ceil((double)in_img.nr()/downsample)), - (long)(std::ceil((double)in_img.nc()/downsample))); - - const double col_border = std::floor(col_filter.size()/2.0); - const double row_border = std::floor(row_filter.size()/2.0); - - // figure out the range that we should apply the filter to - const long first_row = (long)std::ceil(col_border/downsample); - const long first_col = (long)std::ceil(row_border/downsample); - const long last_row = (long)std::ceil((in_img.nr() - col_border)/downsample) - 1; - const long last_col = (long)std::ceil((in_img.nc() - row_border)/downsample) - 1; - - // zero border pixels - const rectangle non_border = rectangle(first_col, first_row, last_col, last_row); - zero_border_pixels(out_img,non_border); - - typedef typename EXP1::type ptype; - - array2d<ptype> temp_img; - temp_img.set_size(in_img.nr(), out_img.nc()); - - // apply the row filter - for (long r = 0; r < temp_img.nr(); ++r) - { - for (long c = non_border.left(); c <= non_border.right(); ++c) - { - ptype p; - ptype temp = 0; - for (long n = 0; n < row_filter.size(); ++n) - { - // pull out the current pixel and put it into p - p = get_pixel_intensity(in_img[r][c*downsample-row_filter.size()/2+n]); - temp += p*row_filter(n); - } - temp_img[r][c] = temp; - } - } - - // apply the column filter - for (long r = non_border.top(); r <= non_border.bottom(); ++r) - { - for (long c = non_border.left(); c <= non_border.right(); ++c) - { - ptype temp = 0; - for (long m = 0; m < col_filter.size(); ++m) - { - temp += temp_img[r*downsample-col_filter.size()/2+m][c]*col_filter(m); - } - - temp /= scale; - - if (use_abs && temp < 0) - { - temp = -temp; - } - - // save this pixel to the output image - if (add_to == false) - { - assign_pixel(out_img[r][c], temp); - } - else - { - assign_pixel(out_img[r][c], temp + out_img[r][c]); - } - } - } - - return non_border; - } - - template < - typename in_image_type, - typename out_image_type, - typename EXP1, - typename EXP2 - > - rectangle spatially_filter_image_separable_down ( - const unsigned long downsample, - const in_image_type& in_img, - out_image_type& out_img, - const matrix_exp<EXP1>& row_filter, - const matrix_exp<EXP2>& col_filter - ) - { - return spatially_filter_image_separable_down(downsample,in_img,out_img,row_filter,col_filter,1); - } - -// ---------------------------------------------------------------------------------------- - - template < - long NR, - long NC, - typename T, - typename U, - typename in_image_type - > - inline void separable_3x3_filter_block_grayscale ( - T (&block)[NR][NC], - const in_image_type& img_, - const long& r, - const long& c, - const U& fe1, // separable filter end - const U& fm, // separable filter middle - const U& fe2 // separable filter end 2 - ) - { - const_image_view<in_image_type> img(img_); - // make sure requires clause is not broken - DLIB_ASSERT(shrink_rect(get_rect(img),1).contains(c,r) && - shrink_rect(get_rect(img),1).contains(c+NC-1,r+NR-1), - "\t void separable_3x3_filter_block_grayscale()" - << "\n\t The sub-window doesn't fit inside the given image." - << "\n\t get_rect(img): " << get_rect(img) - << "\n\t (c,r): " << point(c,r) - << "\n\t (c+NC-1,r+NR-1): " << point(c+NC-1,r+NR-1) - ); - - - T row_filt[NR+2][NC]; - for (long rr = 0; rr < NR+2; ++rr) - { - for (long cc = 0; cc < NC; ++cc) - { - row_filt[rr][cc] = get_pixel_intensity(img[r+rr-1][c+cc-1])*fe1 + - get_pixel_intensity(img[r+rr-1][c+cc])*fm + - get_pixel_intensity(img[r+rr-1][c+cc+1])*fe2; - } - } - - for (long rr = 0; rr < NR; ++rr) - { - for (long cc = 0; cc < NC; ++cc) - { - block[rr][cc] = (row_filt[rr][cc]*fe1 + - row_filt[rr+1][cc]*fm + - row_filt[rr+2][cc]*fe2); - } - } - - } - -// ---------------------------------------------------------------------------------------- - - template < - long NR, - long NC, - typename T, - typename U, - typename in_image_type - > - inline void separable_3x3_filter_block_rgb ( - T (&block)[NR][NC], - const in_image_type& img_, - const long& r, - const long& c, - const U& fe1, // separable filter end - const U& fm, // separable filter middle - const U& fe2 // separable filter end 2 - ) - { - const_image_view<in_image_type> img(img_); - // make sure requires clause is not broken - DLIB_ASSERT(shrink_rect(get_rect(img),1).contains(c,r) && - shrink_rect(get_rect(img),1).contains(c+NC-1,r+NR-1), - "\t void separable_3x3_filter_block_rgb()" - << "\n\t The sub-window doesn't fit inside the given image." - << "\n\t get_rect(img): " << get_rect(img) - << "\n\t (c,r): " << point(c,r) - << "\n\t (c+NC-1,r+NR-1): " << point(c+NC-1,r+NR-1) - ); - - T row_filt[NR+2][NC]; - for (long rr = 0; rr < NR+2; ++rr) - { - for (long cc = 0; cc < NC; ++cc) - { - row_filt[rr][cc].red = img[r+rr-1][c+cc-1].red*fe1 + img[r+rr-1][c+cc].red*fm + img[r+rr-1][c+cc+1].red*fe2; - row_filt[rr][cc].green = img[r+rr-1][c+cc-1].green*fe1 + img[r+rr-1][c+cc].green*fm + img[r+rr-1][c+cc+1].green*fe2; - row_filt[rr][cc].blue = img[r+rr-1][c+cc-1].blue*fe1 + img[r+rr-1][c+cc].blue*fm + img[r+rr-1][c+cc+1].blue*fe2; - } - } - - for (long rr = 0; rr < NR; ++rr) - { - for (long cc = 0; cc < NC; ++cc) - { - block[rr][cc].red = row_filt[rr][cc].red*fe1 + row_filt[rr+1][cc].red*fm + row_filt[rr+2][cc].red*fe2; - block[rr][cc].green = row_filt[rr][cc].green*fe1 + row_filt[rr+1][cc].green*fm + row_filt[rr+2][cc].green*fe2; - block[rr][cc].blue = row_filt[rr][cc].blue*fe1 + row_filt[rr+1][cc].blue*fm + row_filt[rr+2][cc].blue*fe2; - } - } - - } - -// ---------------------------------------------------------------------------------------- - - inline double gaussian ( - double x, - double sigma - ) - { - DLIB_ASSERT(sigma > 0, - "\tdouble gaussian(x)" - << "\n\t sigma must be bigger than 0" - << "\n\t sigma: " << sigma - ); - const double sqrt_2_pi = 2.5066282746310002416123552393401041626930; - return 1.0/(sigma*sqrt_2_pi) * std::exp( -(x*x)/(2*sigma*sigma)); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - matrix<T,0,1> create_gaussian_filter ( - double sigma, - int max_size - ) - { - DLIB_ASSERT(sigma > 0 && max_size > 0 && (max_size%2)==1, - "\t matrix<T,0,1> create_gaussian_filter()" - << "\n\t Invalid inputs were given to this function." - << "\n\t sigma: " << sigma - << "\n\t max_size: " << max_size - ); - - // Adjust the size so that the ratio of the gaussian values isn't huge. - // This only matters when T is an integer type. However, we do it for - // all types so that the behavior of this function is always relatively - // the same. - while (gaussian(0,sigma)/gaussian(max_size/2,sigma) > 50) - --max_size; - - - matrix<double,0,1> f(max_size); - for (long i = 0; i < f.size(); ++i) - { - f(i) = gaussian(i-max_size/2, sigma); - } - - if (is_float_type<T>::value == false) - { - f /= f(0); - return matrix_cast<T>(round(f)); - } - else - { - return matrix_cast<T>(f); - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type - > - rectangle gaussian_blur ( - const in_image_type& in_img, - out_image_type& out_img, - double sigma = 1, - int max_size = 1001 - ) - { - DLIB_ASSERT(sigma > 0 && max_size > 0 && (max_size%2)==1 && - is_same_object(in_img, out_img) == false, - "\t void gaussian_blur()" - << "\n\t Invalid inputs were given to this function." - << "\n\t sigma: " << sigma - << "\n\t max_size: " << max_size - << "\n\t is_same_object(in_img,out_img): " << is_same_object(in_img,out_img) - ); - - if (sigma < 18) - { - typedef typename pixel_traits<typename image_traits<out_image_type>::pixel_type>::basic_pixel_type type; - typedef typename promote<type>::type ptype; - const matrix<ptype,0,1>& filt = create_gaussian_filter<ptype>(sigma, max_size); - ptype scale = sum(filt); - scale = scale*scale; - return spatially_filter_image_separable(in_img, out_img, filt, filt, scale); - } - else - { - // For large sigma we need to use a type with a lot of precision to avoid - // numerical problems. So we use double here. - typedef double ptype; - const matrix<ptype,0,1>& filt = create_gaussian_filter<ptype>(sigma, max_size); - ptype scale = sum(filt); - scale = scale*scale; - return spatially_filter_image_separable(in_img, out_img, filt, filt, scale); - } - - } - -// ---------------------------------------------------------------------------------------- - - namespace impl - { - template < - bool add_to, - typename image_type1, - typename image_type2 - > - void sum_filter ( - const image_type1& img_, - image_type2& out_, - const rectangle& rect - ) - { - const_image_view<image_type1> img(img_); - image_view<image_type2> out(out_); - DLIB_ASSERT(img.nr() == out.nr() && - img.nc() == out.nc() && - is_same_object(img_,out_) == false, - "\t void sum_filter()" - << "\n\t Invalid arguments given to this function." - << "\n\t img.nr(): " << img.nr() - << "\n\t img.nc(): " << img.nc() - << "\n\t out.nr(): " << out.nr() - << "\n\t out.nc(): " << out.nc() - << "\n\t is_same_object(img_,out_): " << is_same_object(img_,out_) - ); - - typedef typename image_traits<image_type1>::pixel_type pixel_type; - typedef typename promote<pixel_type>::type ptype; - - std::vector<ptype> column_sum; - column_sum.resize(img.nc() + rect.width(),0); - - const long top = -1 + rect.top(); - const long bottom = -1 + rect.bottom(); - long left = rect.left()-1; - - // initialize column_sum at row -1 - for (unsigned long j = 0; j < column_sum.size(); ++j) - { - rectangle strip(left,top,left,bottom); - strip = strip.intersect(get_rect(img)); - if (!strip.is_empty()) - { - column_sum[j] = sum(matrix_cast<ptype>(subm(mat(img),strip))); - } - - ++left; - } - - - const rectangle area = get_rect(img); - - // Save width to avoid computing it over and over. - const long width = rect.width(); - - - // Now do the bulk of the filtering work. - for (long r = 0; r < img.nr(); ++r) - { - // set to sum at point(-1,r). i.e. should be equal to sum(mat(img), translate_rect(rect, point(-1,r))) - // We compute it's value in the next loop. - ptype cur_sum = 0; - - // Update the first part of column_sum since we only work on the c+width part of column_sum - // in the main loop. - const long top = r + rect.top() - 1; - const long bottom = r + rect.bottom(); - for (long k = 0; k < width; ++k) - { - const long right = k-width + rect.right(); - - const ptype br_corner = area.contains(right,bottom) ? img[bottom][right] : 0; - const ptype tr_corner = area.contains(right,top) ? img[top][right] : 0; - // update the sum in this column now that we are on the next row - column_sum[k] = column_sum[k] + br_corner - tr_corner; - cur_sum += column_sum[k]; - } - - for (long c = 0; c < img.nc(); ++c) - { - const long top = r + rect.top() - 1; - const long bottom = r + rect.bottom(); - const long right = c + rect.right(); - - const ptype br_corner = area.contains(right,bottom) ? img[bottom][right] : 0; - const ptype tr_corner = area.contains(right,top) ? img[top][right] : 0; - - // update the sum in this column now that we are on the next row - column_sum[c+width] = column_sum[c+width] + br_corner - tr_corner; - - // add in the new right side of the rect and subtract the old right side. - cur_sum = cur_sum + column_sum[c+width] - column_sum[c]; - - if (add_to) - out[r][c] += static_cast<typename image_traits<image_type2>::pixel_type>(cur_sum); - else - out[r][c] = static_cast<typename image_traits<image_type2>::pixel_type>(cur_sum); - } - } - } - } - - template < - typename image_type1, - typename image_type2 - > - void sum_filter ( - const image_type1& img, - image_type2& out, - const rectangle& rect - ) - { - impl::sum_filter<true>(img,out,rect); - } - - template < - typename image_type1, - typename image_type2 - > - void sum_filter_assign ( - const image_type1& img, - image_type2& out, - const rectangle& rect - ) - { - set_image_size(out, num_rows(img), num_columns(img)); - impl::sum_filter<false>(img,out,rect); - } - -// ---------------------------------------------------------------------------------------- - - namespace impl - { - template <typename T> - class fast_deque - { - /* - This is a fast and minimal implementation of std::deque for - use with the max_filter. - - This object assumes that no more than max_size elements - will ever be pushed into it at a time. - */ - public: - - explicit fast_deque(unsigned long max_size) - { - // find a power of two that upper bounds max_size - mask = 2; - while (mask < max_size) - mask *= 2; - - clear(); - - data.resize(mask); - --mask; // make into bit mask - } - - void clear() - { - first = 1; - last = 0; - size = 0; - } - - bool empty() const - { - return size == 0; - } - - void pop_back() - { - last = (last-1)&mask; - --size; - } - - void push_back(const T& item) - { - last = (last+1)&mask; - ++size; - data[last] = item; - } - - void pop_front() - { - first = (first+1)&mask; - --size; - } - - const T& front() const - { - return data[first]; - } - - const T& back() const - { - return data[last]; - } - - private: - - std::vector<T> data; - unsigned long mask; - unsigned long first; - unsigned long last; - unsigned long size; - }; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type1, - typename image_type2 - > - void max_filter ( - image_type1& img_, - image_type2& out_, - const long width, - const long height, - const typename image_traits<image_type1>::pixel_type& thresh - ) - { - image_view<image_type1> img(img_); - image_view<image_type2> out(out_); - DLIB_ASSERT( width > 0 && - height > 0 && - out.nr() == img.nr() && - out.nc() == img.nc() && - is_same_object(img_,out_) == false, - "\t void max_filter()" - << "\n\t Invalid arguments given to this function." - << "\n\t img.nr(): " << img.nr() - << "\n\t img.nc(): " << img.nc() - << "\n\t out.nr(): " << out.nr() - << "\n\t out.nc(): " << out.nc() - << "\n\t width: " << width - << "\n\t height: " << height - << "\n\t is_same_object(img_,out_): " << is_same_object(img_,out_) - ); - - typedef typename image_traits<image_type1>::pixel_type pixel_type; - - - dlib::impl::fast_deque<std::pair<long,pixel_type> > Q(std::max(width,height)); - - const long last_col = std::max(img.nc(), ((width-1)/2)); - const long last_row = std::max(img.nr(), ((height-1)/2)); - - // run max filter along rows of img - for (long r = 0; r < img.nr(); ++r) - { - Q.clear(); - for (long c = 0; c < (width-1)/2 && c < img.nc(); ++c) - { - while (!Q.empty() && img[r][c] >= Q.back().second) - Q.pop_back(); - Q.push_back(std::make_pair(c,img[r][c])); - } - - for (long c = (width-1)/2; c < img.nc(); ++c) - { - while (!Q.empty() && img[r][c] >= Q.back().second) - Q.pop_back(); - while (!Q.empty() && Q.front().first <= c-width) - Q.pop_front(); - Q.push_back(std::make_pair(c,img[r][c])); - - img[r][c-((width-1)/2)] = Q.front().second; - } - - for (long c = last_col; c < img.nc() + ((width-1)/2); ++c) - { - while (!Q.empty() && Q.front().first <= c-width) - Q.pop_front(); - - img[r][c-((width-1)/2)] = Q.front().second; - } - } - - // run max filter along columns of img. Store result in out. - for (long cc = 0; cc < img.nc(); ++cc) - { - Q.clear(); - for (long rr = 0; rr < (height-1)/2 && rr < img.nr(); ++rr) - { - while (!Q.empty() && img[rr][cc] >= Q.back().second) - Q.pop_back(); - Q.push_back(std::make_pair(rr,img[rr][cc])); - } - - for (long rr = (height-1)/2; rr < img.nr(); ++rr) - { - while (!Q.empty() && img[rr][cc] >= Q.back().second) - Q.pop_back(); - while (!Q.empty() && Q.front().first <= rr-height) - Q.pop_front(); - Q.push_back(std::make_pair(rr,img[rr][cc])); - - out[rr-((height-1)/2)][cc] += std::max(Q.front().second, thresh); - } - - for (long rr = last_row; rr < img.nr() + ((height-1)/2); ++rr) - { - while (!Q.empty() && Q.front().first <= rr-height) - Q.pop_front(); - - out[rr-((height-1)/2)][cc] += std::max(Q.front().second, thresh); - } - } - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SPATIAL_FILTERINg_H_ - - diff --git a/ml/dlib/dlib/image_transforms/spatial_filtering_abstract.h b/ml/dlib/dlib/image_transforms/spatial_filtering_abstract.h deleted file mode 100644 index 5e200aa9a..000000000 --- a/ml/dlib/dlib/image_transforms/spatial_filtering_abstract.h +++ /dev/null @@ -1,487 +0,0 @@ -// Copyright (C) 2006 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_SPATIAL_FILTERINg_ABSTRACT_ -#ifdef DLIB_SPATIAL_FILTERINg_ABSTRACT_ - -#include "../pixel.h" -#include "../matrix.h" -#include "../image_processing/generic_image.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type, - typename EXP, - typename T - > - rectangle spatially_filter_image ( - const in_image_type& in_img, - out_image_type& out_img, - const matrix_exp<EXP>& filter, - T scale = 1, - bool use_abs = false, - bool add_to = false - ); - /*! - requires - - in_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - out_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - in_img and out_img do not contain pixels with an alpha channel. That is, - pixel_traits::has_alpha is false for the pixels in these objects. - - is_same_object(in_img, out_img) == false - - T must be some scalar type - - filter.size() != 0 - - scale != 0 - - if (in_img doesn't contain grayscale pixels) then - - use_abs == false && add_to == false - (i.e. You can only use the use_abs and add_to options with grayscale images) - ensures - - Applies the given spatial filter to in_img and stores the result in out_img (i.e. - cross-correlates in_img with filter). Also divides each resulting pixel by scale. - - The intermediate filter computations will be carried out using variables of type EXP::type. - This is whatever scalar type is used inside the filter matrix. - - Pixel values are stored into out_img using the assign_pixel() function and therefore - any applicable color space conversion or value saturation is performed. Note that if - add_to is true then the filtered output value will be added to out_img rather than - overwriting the original value. - - if (in_img doesn't contain grayscale pixels) then - - The filter is applied to each color channel independently. - - if (use_abs == true) then - - pixel values after filtering that are < 0 are converted to their absolute values. - - The filter is applied such that it's centered over the pixel it writes its - output into. For centering purposes, we consider the center element of the - filter to be filter(filter.nr()/2,filter.nc()/2). This means that the filter - that writes its output to a pixel at location point(c,r) and is W by H (width - by height) pixels in size operates on exactly the pixels in the rectangle - centered_rect(point(c,r),W,H) within in_img. - - Pixels close enough to the edge of in_img to not have the filter still fit - inside the image are always set to zero. - - #out_img.nc() == in_img.nc() - - #out_img.nr() == in_img.nr() - - returns a rectangle which indicates what pixels in #out_img are considered - non-border pixels and therefore contain output from the filter. - - if (use_abs == false && all images and filers contain float types) then - - This function will use SIMD instructions and is particularly fast. So if - you can use this form of the function it can give a decent speed boost. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type, - typename EXP1, - typename EXP2, - typename T - > - rectangle spatially_filter_image_separable ( - const in_image_type& in_img, - out_image_type& out_img, - const matrix_exp<EXP1>& row_filter, - const matrix_exp<EXP2>& col_filter, - T scale = 1, - bool use_abs = false, - bool add_to = false - ); - /*! - requires - - in_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - out_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - in_img and out_img do not contain pixels with an alpha channel. That is, - pixel_traits::has_alpha is false for the pixels in these objects. - - is_same_object(in_img, out_img) == false - - T must be some scalar type - - scale != 0 - - row_filter.size() != 0 - - col_filter.size() != 0 - - is_vector(row_filter) == true - - is_vector(col_filter) == true - - if (in_img doesn't contain grayscale pixels) then - - use_abs == false && add_to == false - (i.e. You can only use the use_abs and add_to options with grayscale images) - ensures - - Applies the given separable spatial filter to in_img and stores the result in out_img. - Also divides each resulting pixel by scale. Calling this function has the same - effect as calling the regular spatially_filter_image() routine with a filter, - FILT, defined as follows: - - FILT(r,c) == col_filter(r)*row_filter(c) - - The intermediate filter computations will be carried out using variables of type EXP1::type. - This is whatever scalar type is used inside the row_filter matrix. - - Pixel values are stored into out_img using the assign_pixel() function and therefore - any applicable color space conversion or value saturation is performed. Note that if - add_to is true then the filtered output value will be added to out_img rather than - overwriting the original value. - - if (in_img doesn't contain grayscale pixels) then - - The filter is applied to each color channel independently. - - if (use_abs == true) then - - pixel values after filtering that are < 0 are converted to their absolute values - - The filter is applied such that it's centered over the pixel it writes its - output into. For centering purposes, we consider the center element of the - filter to be FILT(col_filter.size()/2,row_filter.size()/2). This means that - the filter that writes its output to a pixel at location point(c,r) and is W - by H (width by height) pixels in size operates on exactly the pixels in the - rectangle centered_rect(point(c,r),W,H) within in_img. - - Pixels close enough to the edge of in_img to not have the filter still fit - inside the image are always set to zero. - - #out_img.nc() == in_img.nc() - - #out_img.nr() == in_img.nr() - - returns a rectangle which indicates what pixels in #out_img are considered - non-border pixels and therefore contain output from the filter. - - if (use_abs == false && all images and filers contain float types) then - - This function will use SIMD instructions and is particularly fast. So if - you can use this form of the function it can give a decent speed boost. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type, - typename EXP1, - typename EXP2 - > - rectangle float_spatially_filter_image_separable ( - const in_image_type& in_img, - out_image_type& out_img, - const matrix_exp<EXP1>& row_filter, - const matrix_exp<EXP2>& col_filter, - out_image_type& scratch, - bool add_to = false - ); - /*! - requires - - in_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - out_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - in_img, out_img, row_filter, and col_filter must all contain float type elements. - - is_same_object(in_img, out_img) == false - - row_filter.size() != 0 - - col_filter.size() != 0 - - is_vector(row_filter) == true - - is_vector(col_filter) == true - ensures - - This function is identical to the above spatially_filter_image_separable() - function except that it can only be invoked on float images with float - filters. In fact, spatially_filter_image_separable() invokes - float_spatially_filter_image_separable() in those cases. So why is - float_spatially_filter_image_separable() in the public API? The reason is - because the separable filtering routines internally allocate an image each - time they are called. If you want to avoid this memory allocation then you - can call float_spatially_filter_image_separable() and provide the scratch - image as input. This allows you to reuse the same scratch image for many - calls to float_spatially_filter_image_separable() and thereby avoid having it - allocated and freed for each call. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type, - typename EXP1, - typename EXP2, - typename T - > - rectangle spatially_filter_image_separable_down ( - const unsigned long downsample, - const in_image_type& in_img, - out_image_type& out_img, - const matrix_exp<EXP1>& row_filter, - const matrix_exp<EXP2>& col_filter, - T scale = 1, - bool use_abs = false, - bool add_to = false - ); - /*! - requires - - in_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - out_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - in_img and out_img do not contain pixels with an alpha channel. That is, - pixel_traits::has_alpha is false for the pixels in these objects. - - out_img contains grayscale pixels. - - is_same_object(in_img, out_img) == false - - T must be some scalar type - - scale != 0 - - is_vector(row_filter) == true - - is_vector(col_filter) == true - - row_filter.size() % 2 == 1 (i.e. must be odd) - - col_filter.size() % 2 == 1 (i.e. must be odd) - - downsample > 0 - ensures - - This function is equivalent to calling - spatially_filter_image_separable(in_img,out_img,row_filter,col_filter,scale,use_abs,add_to) - and then downsampling the output image by a factor of downsample. Therefore, - we will have that: - - #out_img.nr() == ceil((double)in_img.nr()/downsample) - - #out_img.nc() == ceil((double)in_img.nc()/downsample) - - #out_img[r][c] == filtered pixel corresponding to in_img[r*downsample][c*downsample] - - returns a rectangle which indicates what pixels in #out_img are considered - non-border pixels and therefore contain output from the filter. - - Note that the first row and column of non-zero padded data are the following - - first_row == ceil(floor(col_filter.size()/2.0)/downsample) - - first_col == ceil(floor(row_filter.size()/2.0)/downsample) - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - long NR, - long NC, - typename T, - typename U, - typename in_image_type - > - inline void separable_3x3_filter_block_grayscale ( - T (&block)[NR][NC], - const in_image_type& img, - const long& r, - const long& c, - const U& fe1, - const U& fm, - const U& fe2 - ); - /*! - requires - - in_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - T and U should be scalar types - - shrink_rect(get_rect(img),1).contains(c,r) - - shrink_rect(get_rect(img),1).contains(c+NC-1,r+NR-1) - ensures - - Filters the image in the sub-window of img defined by a rectangle - with its upper left corner at (c,r) and lower right at (c+NC-1,r+NR-1). - - The output of the filter is stored in #block. Note that img will be - interpreted as a grayscale image. - - The filter used is defined by the separable filter [fe1 fm fe2]. So the - spatial filter is thus: - fe1*fe1 fe1*fm fe2*fe1 - fe1*fm fm*fm fe2*fm - fe1*fe2 fe2*fm fe2*fe2 - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - long NR, - long NC, - typename T, - typename U, - typename in_image_type - > - inline void separable_3x3_filter_block_rgb ( - T (&block)[NR][NC], - const in_image_type& img, - const long& r, - const long& c, - const U& fe1, - const U& fm, - const U& fe2 - ); - /*! - requires - - in_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - img must contain RGB pixels, that is pixel_traits::rgb == true for the pixels - in img. - - T should be a struct with .red .green and .blue members. - - U should be a scalar type - - shrink_rect(get_rect(img),1).contains(c,r) - - shrink_rect(get_rect(img),1).contains(c+NC-1,r+NR-1) - ensures - - Filters the image in the sub-window of img defined by a rectangle - with its upper left corner at (c,r) and lower right at (c+NC-1,r+NR-1). - - The output of the filter is stored in #block. Note that the filter is applied - to each color component independently. - - The filter used is defined by the separable filter [fe1 fm fe2]. So the - spatial filter is thus: - fe1*fe1 fe1*fm fe2*fe1 - fe1*fm fm*fm fe2*fm - fe1*fe2 fe2*fm fe2*fe2 - !*/ - -// ---------------------------------------------------------------------------------------- - - inline double gaussian ( - double x, - double sigma - ); - /*! - requires - - sigma > 0 - ensures - - computes and returns the value of a 1D Gaussian function with mean 0 - and standard deviation sigma at the given x value. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - matrix<T,0,1> create_gaussian_filter ( - double sigma, - int size - ); - /*! - requires - - sigma > 0 - - size > 0 - - size is an odd number - ensures - - returns a separable Gaussian filter F such that: - - is_vector(F) == true - - F.size() == size - - F is suitable for use with the spatially_filter_image_separable() routine - and its use with this function corresponds to running a Gaussian filter - of sigma width over an image. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type - > - rectangle gaussian_blur ( - const in_image_type& in_img, - out_image_type& out_img, - double sigma = 1, - int max_size = 1001 - ); - /*! - requires - - in_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - out_image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - in_img and out_img do not contain pixels with an alpha channel. That is, - pixel_traits::has_alpha is false for the pixels in these objects. - - is_same_object(in_img, out_img) == false - - sigma > 0 - - max_size > 0 - - max_size is an odd number - ensures - - Filters in_img with a Gaussian filter of sigma width. The actual spatial filter will - be applied to pixel blocks that are at most max_size wide and max_size tall (note that - this function will automatically select a smaller block size as appropriate). The - results are stored into #out_img. - - Pixel values are stored into out_img using the assign_pixel() function and therefore - any applicable color space conversion or value saturation is performed. - - if (in_img doesn't contain grayscale pixels) then - - The filter is applied to each color channel independently. - - Pixels close enough to the edge of in_img to not have the filter still fit - inside the image are set to zero. - - #out_img.nc() == in_img.nc() - - #out_img.nr() == in_img.nr() - - returns a rectangle which indicates what pixels in #out_img are considered - non-border pixels and therefore contain output from the filter. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type1, - typename image_type2 - > - void sum_filter ( - const image_type1& img, - image_type2& out, - const rectangle& rect - ); - /*! - requires - - out.nr() == img.nr() - - out.nc() == img.nc() - - image_type1 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h and it must contain grayscale pixels. - - image_type2 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h and it must contain grayscale pixels. - - is_same_object(img,out) == false - ensures - - for all valid r and c: - - let SUM(r,c) == sum of pixels from img which are inside the rectangle - translate_rect(rect, point(c,r)). - - #out[r][c] == out[r][c] + SUM(r,c) - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type1, - typename image_type2 - > - void sum_filter_assign ( - const image_type1& img, - image_type2& out, - const rectangle& rect - ); - /*! - requires - - image_type1 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h and it must contain grayscale pixels. - - image_type2 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h and it must contain grayscale pixels. - - is_same_object(img,out) == false - ensures - - #out.nr() == img.nr() - - #out.nc() == img.nc() - - for all valid r and c: - - let SUM(r,c) == sum of pixels from img which are inside the rectangle - translate_rect(rect, point(c,r)). - - #out[r][c] == SUM(r,c) - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type1, - typename image_type2 - > - void max_filter ( - image_type1& img, - image_type2& out, - const long width, - const long height, - const typename image_traits<image_type1>::pixel_type& thresh - ); - /*! - requires - - out.nr() == img.nr() - - out.nc() == img.nc() - - image_type1 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h and it must contain grayscale pixels. - - image_type2 == an image object that implements the interface defined in - dlib/image_processing/generic_image.h and it must contain grayscale pixels. - - is_same_object(img,out) == false - - width > 0 && height > 0 - ensures - - for all valid r and c: - - let MAX(r,c) == maximum of pixels from img which are inside the rectangle - centered_rect(point(c,r), width, height) - - if (MAX(r,c) >= thresh) - - #out[r][c] == out[r][c] + MAX(r,c) - - else - - #out[r][c] == out[r][c] + thresh - - Does not change the size of img. - - Uses img as scratch space. Therefore, the pixel values in img will have - been modified by this function. That is, max_filter() destroys the contents - of img. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SPATIAL_FILTERINg_ABSTRACT_ - diff --git a/ml/dlib/dlib/image_transforms/thresholding.h b/ml/dlib/dlib/image_transforms/thresholding.h deleted file mode 100644 index e4fb02c4a..000000000 --- a/ml/dlib/dlib/image_transforms/thresholding.h +++ /dev/null @@ -1,340 +0,0 @@ -// Copyright (C) 2006 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_THRESHOLDINg_ -#define DLIB_THRESHOLDINg_ - -#include "../pixel.h" -#include "thresholding_abstract.h" -#include "equalize_histogram.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - const unsigned char on_pixel = 255; - const unsigned char off_pixel = 0; - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type - > - void threshold_image ( - const in_image_type& in_img_, - out_image_type& out_img_, - typename pixel_traits<typename image_traits<in_image_type>::pixel_type>::basic_pixel_type thresh - ) - { - COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false ); - COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false ); - - COMPILE_TIME_ASSERT(pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale); - - const_image_view<in_image_type> in_img(in_img_); - image_view<out_image_type> out_img(out_img_); - - // if there isn't any input image then don't do anything - if (in_img.size() == 0) - { - out_img.clear(); - return; - } - - out_img.set_size(in_img.nr(),in_img.nc()); - - for (long r = 0; r < in_img.nr(); ++r) - { - for (long c = 0; c < in_img.nc(); ++c) - { - if (get_pixel_intensity(in_img[r][c]) >= thresh) - assign_pixel(out_img[r][c], on_pixel); - else - assign_pixel(out_img[r][c], off_pixel); - } - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename image_type - > - void threshold_image ( - image_type& img, - typename pixel_traits<typename image_traits<image_type>::pixel_type>::basic_pixel_type thresh - ) - { - threshold_image(img,img,thresh); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type - > - void auto_threshold_image ( - const in_image_type& in_img_, - out_image_type& out_img_ - ) - { - COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false ); - COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false ); - COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<in_image_type>::pixel_type>::is_unsigned == true ); - COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<out_image_type>::pixel_type>::is_unsigned == true ); - - COMPILE_TIME_ASSERT(pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale); - - image_view<out_image_type> out_img(out_img_); - - // if there isn't any input image then don't do anything - if (image_size(in_img_) == 0) - { - out_img.clear(); - return; - } - - unsigned long thresh; - // find the threshold we should use - matrix<unsigned long,1> hist; - get_histogram(in_img_,hist); - - const_image_view<in_image_type> in_img(in_img_); - - // Start our two means (a and b) out at the ends of the histogram - long a = 0; - long b = hist.size()-1; - bool moved_a = true; - bool moved_b = true; - while (moved_a || moved_b) - { - moved_a = false; - moved_b = false; - - // catch the degenerate case where the histogram is empty - if (a >= b) - break; - - if (hist(a) == 0) - { - ++a; - moved_a = true; - } - - if (hist(b) == 0) - { - --b; - moved_b = true; - } - } - - // now do k-means clustering with k = 2 on the histogram. - moved_a = true; - moved_b = true; - while (moved_a || moved_b) - { - moved_a = false; - moved_b = false; - - int64 a_hits = 0; - int64 b_hits = 0; - int64 a_mass = 0; - int64 b_mass = 0; - - for (long i = 0; i < hist.size(); ++i) - { - // if i is closer to a - if (std::abs(i-a) < std::abs(i-b)) - { - a_mass += hist(i)*i; - a_hits += hist(i); - } - else // if i is closer to b - { - b_mass += hist(i)*i; - b_hits += hist(i); - } - } - - long new_a = (a_mass + a_hits/2)/a_hits; - long new_b = (b_mass + b_hits/2)/b_hits; - - if (new_a != a) - { - moved_a = true; - a = new_a; - } - - if (new_b != b) - { - moved_b = true; - b = new_b; - } - } - - // put the threshold between the two means we found - thresh = (a + b)/2; - - // now actually apply the threshold - threshold_image(in_img_,out_img_,thresh); - } - - template < - typename image_type - > - void auto_threshold_image ( - image_type& img - ) - { - auto_threshold_image(img,img); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type - > - void hysteresis_threshold ( - const in_image_type& in_img_, - out_image_type& out_img_, - typename pixel_traits<typename image_traits<in_image_type>::pixel_type>::basic_pixel_type lower_thresh, - typename pixel_traits<typename image_traits<in_image_type>::pixel_type>::basic_pixel_type upper_thresh - ) - { - COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false ); - COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false ); - - COMPILE_TIME_ASSERT(pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale); - - DLIB_ASSERT( lower_thresh <= upper_thresh && is_same_object(in_img_, out_img_) == false, - "\tvoid hysteresis_threshold(in_img_, out_img_, lower_thresh, upper_thresh)" - << "\n\tYou can't use an upper_thresh that is less than your lower_thresh" - << "\n\tlower_thresh: " << lower_thresh - << "\n\tupper_thresh: " << upper_thresh - << "\n\tis_same_object(in_img_,out_img_): " << is_same_object(in_img_,out_img_) - ); - - const_image_view<in_image_type> in_img(in_img_); - image_view<out_image_type> out_img(out_img_); - - // if there isn't any input image then don't do anything - if (in_img.size() == 0) - { - out_img.clear(); - return; - } - - out_img.set_size(in_img.nr(),in_img.nc()); - assign_all_pixels(out_img, off_pixel); - - const long size = 1000; - long rstack[size]; - long cstack[size]; - - // now do the thresholding - for (long r = 0; r < in_img.nr(); ++r) - { - for (long c = 0; c < in_img.nc(); ++c) - { - typename pixel_traits<typename image_traits<in_image_type>::pixel_type>::basic_pixel_type p; - assign_pixel(p,in_img[r][c]); - if (p >= upper_thresh) - { - // now do line following for pixels >= lower_thresh. - // set the stack position to 0. - long pos = 1; - rstack[0] = r; - cstack[0] = c; - - while (pos > 0) - { - --pos; - const long r = rstack[pos]; - const long c = cstack[pos]; - - // This is the base case of our recursion. We want to stop if we hit a - // pixel we have already visited. - if (out_img[r][c] == on_pixel) - continue; - - out_img[r][c] = on_pixel; - - // put the neighbors of this pixel on the stack if they are bright enough - if (r-1 >= 0) - { - if (pos < size && get_pixel_intensity(in_img[r-1][c]) >= lower_thresh) - { - rstack[pos] = r-1; - cstack[pos] = c; - ++pos; - } - if (pos < size && c-1 >= 0 && get_pixel_intensity(in_img[r-1][c-1]) >= lower_thresh) - { - rstack[pos] = r-1; - cstack[pos] = c-1; - ++pos; - } - if (pos < size && c+1 < in_img.nc() && get_pixel_intensity(in_img[r-1][c+1]) >= lower_thresh) - { - rstack[pos] = r-1; - cstack[pos] = c+1; - ++pos; - } - } - - if (pos < size && c-1 >= 0 && get_pixel_intensity(in_img[r][c-1]) >= lower_thresh) - { - rstack[pos] = r; - cstack[pos] = c-1; - ++pos; - } - if (pos < size && c+1 < in_img.nc() && get_pixel_intensity(in_img[r][c+1]) >= lower_thresh) - { - rstack[pos] = r; - cstack[pos] = c+1; - ++pos; - } - - if (r+1 < in_img.nr()) - { - if (pos < size && get_pixel_intensity(in_img[r+1][c]) >= lower_thresh) - { - rstack[pos] = r+1; - cstack[pos] = c; - ++pos; - } - if (pos < size && c-1 >= 0 && get_pixel_intensity(in_img[r+1][c-1]) >= lower_thresh) - { - rstack[pos] = r+1; - cstack[pos] = c-1; - ++pos; - } - if (pos < size && c+1 < in_img.nc() && get_pixel_intensity(in_img[r+1][c+1]) >= lower_thresh) - { - rstack[pos] = r+1; - cstack[pos] = c+1; - ++pos; - } - } - - } // end while (pos >= 0) - - } - else - { - out_img[r][c] = off_pixel; - } - - } - } - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_THRESHOLDINg_ - diff --git a/ml/dlib/dlib/image_transforms/thresholding_abstract.h b/ml/dlib/dlib/image_transforms/thresholding_abstract.h deleted file mode 100644 index e7c1e8826..000000000 --- a/ml/dlib/dlib/image_transforms/thresholding_abstract.h +++ /dev/null @@ -1,139 +0,0 @@ -// Copyright (C) 2006 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_THRESHOLDINg_ABSTRACT_ -#ifdef DLIB_THRESHOLDINg_ABSTRACT_ - -#include "../pixel.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - const unsigned char on_pixel = 255; - const unsigned char off_pixel = 0; - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type - > - void threshold_image ( - const in_image_type& in_img, - out_image_type& out_img, - typename pixel_traits<typename image_traits<in_image_type>::pixel_type>::basic_pixel_type thresh - ); - /*! - requires - - in_image_type == is an implementation of array2d/array2d_kernel_abstract.h - - out_image_type == is an implementation of array2d/array2d_kernel_abstract.h - - pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale == true - - pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false - - pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false - ensures - - #out_img == the thresholded version of in_img (in_img is converted to a grayscale - intensity image if it is color). Pixels in in_img with grayscale values >= thresh - have an output value of on_pixel and all others have a value of off_pixel. - - #out_img.nc() == in_img.nc() - - #out_img.nr() == in_img.nr() - !*/ - - template < - typename image_type - > - void threshold_image ( - image_type& img, - typename pixel_traits<typename image_traits<image_type>::pixel_type>::basic_pixel_type thresh - ); - /*! - requires - - it is valid to call threshold_image(img,img,thresh); - ensures - - calls threshold_image(img,img,thresh); - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type - > - void auto_threshold_image ( - const in_image_type& in_img, - out_image_type& out_img - ); - /*! - requires - - in_image_type == is an implementation of array2d/array2d_kernel_abstract.h - - out_image_type == is an implementation of array2d/array2d_kernel_abstract.h - - pixel_traits<typename image_traits<in_image_type>::pixel_type>::max() <= 65535 - - pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false - - pixel_traits<typename image_traits<in_image_type>::pixel_type>::is_unsigned == true - - pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale == true - - pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false - - pixel_traits<typename image_traits<out_image_type>::pixel_type>::is_unsigned == true - ensures - - #out_img == the thresholded version of in_img (in_img is converted to a grayscale - intensity image if it is color). Pixels in in_img with grayscale values >= thresh - have an output value of on_pixel and all others have a value of off_pixel. - - The thresh value used is determined by performing a k-means clustering - on the input image histogram with a k of 2. The point between the two - means found is used as the thresh value. - - #out_img.nc() == in_img.nc() - - #out_img.nr() == in_img.nr() - !*/ - - template < - typename image_type - > - void auto_threshold_image ( - image_type& img - ); - /*! - requires - - it is valid to call auto_threshold_image(img,img); - ensures - - calls auto_threshold_image(img,img); - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type - > - void hysteresis_threshold ( - const in_image_type& in_img, - out_image_type& out_img, - typename pixel_traits<typename image_traits<in_image_type>::pixel_type>::basic_pixel_type lower_thresh, - typename pixel_traits<typename image_traits<in_image_type>::pixel_type>::basic_pixel_type upper_thresh - ); - /*! - requires - - in_image_type == is an implementation of array2d/array2d_kernel_abstract.h - - out_image_type == is an implementation of array2d/array2d_kernel_abstract.h - - pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale == true - - pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false - - pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false - - lower_thresh <= upper_thresh - - is_same_object(in_img, out_img) == false - ensures - - #out_img == the hysteresis thresholded version of in_img (in_img is converted to a - grayscale intensity image if it is color). Pixels in in_img with grayscale - values >= upper_thresh have an output value of on_pixel and all others have a - value of off_pixel unless they are >= lower_thresh and are connected to a pixel - with a value >= upper_thresh, in which case they have a value of on_pixel. Here - pixels are connected if there is a path between them composed of pixels that - would receive an output of on_pixel. - - #out_img.nc() == in_img.nc() - - #out_img.nr() == in_img.nr() - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_THRESHOLDINg_ABSTRACT_ - - |