// Copyright (C) 2012 Davis E. King (davis@dlib.net) // License: Boost Software License See LICENSE.txt for the full license. #ifndef DLIB_INTERPOlATIONh_ #define DLIB_INTERPOlATIONh_ #include "interpolation_abstract.h" #include "../pixel.h" #include "../matrix.h" #include "assign_image.h" #include "image_pyramid.h" #include "../simd.h" #include "../image_processing/full_object_detection.h" #include #include "../rand.h" namespace dlib { // ---------------------------------------------------------------------------------------- template struct sub_image_proxy { sub_image_proxy() = default; sub_image_proxy ( T& img, rectangle rect ) { rect = rect.intersect(get_rect(img)); typedef typename image_traits::pixel_type pixel_type; _nr = rect.height(); _nc = rect.width(); _width_step = width_step(img); _data = (char*)image_data(img) + sizeof(pixel_type)*rect.left() + rect.top()*_width_step; } void* _data = 0; long _width_step = 0; long _nr = 0; long _nc = 0; }; template struct const_sub_image_proxy { const_sub_image_proxy() = default; const_sub_image_proxy ( const T& img, rectangle rect ) { rect = rect.intersect(get_rect(img)); typedef typename image_traits::pixel_type pixel_type; _nr = rect.height(); _nc = rect.width(); _width_step = width_step(img); _data = (const char*)image_data(img) + sizeof(pixel_type)*rect.left() + rect.top()*_width_step; } const void* _data = 0; long _width_step = 0; long _nr = 0; long _nc = 0; }; template struct image_traits > { typedef typename image_traits::pixel_type pixel_type; }; template struct image_traits > { typedef typename image_traits::pixel_type pixel_type; }; template struct image_traits > { typedef typename image_traits::pixel_type pixel_type; }; template struct image_traits > { typedef typename image_traits::pixel_type pixel_type; }; template inline long num_rows( const sub_image_proxy& img) { return img._nr; } template inline long num_columns( const sub_image_proxy& img) { return img._nc; } template inline long num_rows( const const_sub_image_proxy& img) { return img._nr; } template inline long num_columns( const const_sub_image_proxy& img) { return img._nc; } template inline void* image_data( sub_image_proxy& img) { return img._data; } template inline const void* image_data( const sub_image_proxy& img) { return img._data; } template inline const void* image_data( const const_sub_image_proxy& img) { return img._data; } template inline long width_step( const sub_image_proxy& img ) { return img._width_step; } template inline long width_step( const const_sub_image_proxy& img ) { return img._width_step; } template void set_image_size(sub_image_proxy& img, long rows, long cols) { DLIB_CASSERT(img._nr == rows && img._nc == cols, "A sub_image can't be resized." << "\n\t img._nr: "<< img._nr << "\n\t img._nc: "<< img._nc << "\n\t rows: "<< rows << "\n\t cols: "<< cols ); } template < typename image_type > sub_image_proxy sub_image ( image_type& img, const rectangle& rect ) { return sub_image_proxy(img,rect); } template < typename image_type > const const_sub_image_proxy sub_image ( const image_type& img, const rectangle& rect ) { return const_sub_image_proxy(img,rect); } template inline sub_image_proxy> sub_image ( T* img, long nr, long nc, long row_stride ) { sub_image_proxy> tmp; tmp._data = img; tmp._nr = nr; tmp._nc = nc; tmp._width_step = row_stride*sizeof(T); return tmp; } template inline const const_sub_image_proxy> sub_image ( const T* img, long nr, long nc, long row_stride ) { const_sub_image_proxy> tmp; tmp._data = img; tmp._nr = nr; tmp._nc = nc; tmp._width_step = row_stride*sizeof(T); return tmp; } // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- class interpolate_nearest_neighbor { public: template bool operator() ( const image_view_type& img, const dlib::point& p, pixel_type& result ) const { COMPILE_TIME_ASSERT(pixel_traits::has_alpha == false); if (get_rect(img).contains(p)) { assign_pixel(result, img[p.y()][p.x()]); return true; } else { return false; } } }; // ---------------------------------------------------------------------------------------- class interpolate_bilinear { template struct is_rgb_image { const static bool value = pixel_traits::rgb; }; public: template typename disable_if,bool>::type operator() ( const image_view_type& img, const dlib::vector& p, pixel_type& result ) const { COMPILE_TIME_ASSERT(pixel_traits::has_alpha == false); const long left = static_cast(std::floor(p.x())); const long top = static_cast(std::floor(p.y())); const long right = left+1; const long bottom = top+1; // if the interpolation goes outside img if (!(left >= 0 && top >= 0 && right < img.nc() && bottom < img.nr())) return false; const double lr_frac = p.x() - left; const double tb_frac = p.y() - top; double tl = 0, tr = 0, bl = 0, br = 0; assign_pixel(tl, img[top][left]); assign_pixel(tr, img[top][right]); assign_pixel(bl, img[bottom][left]); assign_pixel(br, img[bottom][right]); double temp = (1-tb_frac)*((1-lr_frac)*tl + lr_frac*tr) + tb_frac*((1-lr_frac)*bl + lr_frac*br); assign_pixel(result, temp); return true; } template typename enable_if,bool>::type operator() ( const image_view_type& img, const dlib::vector& p, pixel_type& result ) const { COMPILE_TIME_ASSERT(pixel_traits::has_alpha == false); const long left = static_cast(std::floor(p.x())); const long top = static_cast(std::floor(p.y())); const long right = left+1; const long bottom = top+1; // if the interpolation goes outside img if (!(left >= 0 && top >= 0 && right < img.nc() && bottom < img.nr())) return false; const double lr_frac = p.x() - left; const double tb_frac = p.y() - top; double tl, tr, bl, br; tl = img[top][left].red; tr = img[top][right].red; bl = img[bottom][left].red; br = img[bottom][right].red; const double red = (1-tb_frac)*((1-lr_frac)*tl + lr_frac*tr) + tb_frac*((1-lr_frac)*bl + lr_frac*br); tl = img[top][left].green; tr = img[top][right].green; bl = img[bottom][left].green; br = img[bottom][right].green; const double green = (1-tb_frac)*((1-lr_frac)*tl + lr_frac*tr) + tb_frac*((1-lr_frac)*bl + lr_frac*br); tl = img[top][left].blue; tr = img[top][right].blue; bl = img[bottom][left].blue; br = img[bottom][right].blue; const double blue = (1-tb_frac)*((1-lr_frac)*tl + lr_frac*tr) + tb_frac*((1-lr_frac)*bl + lr_frac*br); rgb_pixel temp; assign_pixel(temp.red, red); assign_pixel(temp.green, green); assign_pixel(temp.blue, blue); assign_pixel(result, temp); return true; } }; // ---------------------------------------------------------------------------------------- class interpolate_quadratic { template struct is_rgb_image { const static bool value = pixel_traits::rgb; }; public: template typename disable_if,bool>::type operator() ( const image_view_type& img, const dlib::vector& p, pixel_type& result ) const { COMPILE_TIME_ASSERT(pixel_traits::has_alpha == false); const point pp(p); // if the interpolation goes outside img if (!get_rect(img).contains(grow_rect(pp,1))) return false; const long r = pp.y(); const long c = pp.x(); const double temp = interpolate(p-pp, img[r-1][c-1], img[r-1][c ], img[r-1][c+1], img[r ][c-1], img[r ][c ], img[r ][c+1], img[r+1][c-1], img[r+1][c ], img[r+1][c+1]); assign_pixel(result, temp); return true; } template typename enable_if,bool>::type operator() ( const image_view_type& img, const dlib::vector& p, pixel_type& result ) const { COMPILE_TIME_ASSERT(pixel_traits::has_alpha == false); const point pp(p); // if the interpolation goes outside img if (!get_rect(img).contains(grow_rect(pp,1))) return false; const long r = pp.y(); const long c = pp.x(); const double red = interpolate(p-pp, img[r-1][c-1].red, img[r-1][c ].red, img[r-1][c+1].red, img[r ][c-1].red, img[r ][c ].red, img[r ][c+1].red, img[r+1][c-1].red, img[r+1][c ].red, img[r+1][c+1].red); const double green = interpolate(p-pp, img[r-1][c-1].green, img[r-1][c ].green, img[r-1][c+1].green, img[r ][c-1].green, img[r ][c ].green, img[r ][c+1].green, img[r+1][c-1].green, img[r+1][c ].green, img[r+1][c+1].green); const double blue = interpolate(p-pp, img[r-1][c-1].blue, img[r-1][c ].blue, img[r-1][c+1].blue, img[r ][c-1].blue, img[r ][c ].blue, img[r ][c+1].blue, img[r+1][c-1].blue, img[r+1][c ].blue, img[r+1][c+1].blue); rgb_pixel temp; assign_pixel(temp.red, red); assign_pixel(temp.green, green); assign_pixel(temp.blue, blue); assign_pixel(result, temp); return true; } private: /* tl tm tr ml mm mr bl bm br */ // The above is the pixel layout in our little 3x3 neighborhood. interpolate() will // fit a quadratic to these 9 pixels and then use that quadratic to find the interpolated // value at point p. inline double interpolate( const dlib::vector& p, double tl, double tm, double tr, double ml, double mm, double mr, double bl, double bm, double br ) const { matrix w; // x w(0) = (tr + mr + br - tl - ml - bl)*0.16666666666; // y w(1) = (bl + bm + br - tl - tm - tr)*0.16666666666; // x^2 w(2) = (tl + tr + ml + mr + bl + br)*0.16666666666 - (tm + mm + bm)*0.333333333; // x*y w(3) = (tl - tr - bl + br)*0.25; // y^2 w(4) = (tl + tm + tr + bl + bm + br)*0.16666666666 - (ml + mm + mr)*0.333333333; // 1 (constant term) w(5) = (tm + ml + mr + bm)*0.222222222 - (tl + tr + bl + br)*0.11111111 + (mm)*0.55555556; const double x = p.x(); const double y = p.y(); matrix z; z = x, y, x*x, x*y, y*y, 1.0; return dot(w,z); } }; // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- class black_background { public: template void operator() ( pixel_type& p) const { assign_pixel(p, 0); } }; class white_background { public: template void operator() ( pixel_type& p) const { assign_pixel(p, 255); } }; class no_background { public: template void operator() ( pixel_type& ) const { } }; // ---------------------------------------------------------------------------------------- template < typename image_type1, typename image_type2, typename interpolation_type, typename point_mapping_type, typename background_type > void transform_image ( const image_type1& in_img, image_type2& out_img, const interpolation_type& interp, const point_mapping_type& map_point, const background_type& set_background, const rectangle& area ) { // make sure requires clause is not broken DLIB_ASSERT( get_rect(out_img).contains(area) == true && is_same_object(in_img, out_img) == false , "\t void transform_image()" << "\n\t Invalid inputs were given to this function." << "\n\t get_rect(out_img).contains(area): " << get_rect(out_img).contains(area) << "\n\t get_rect(out_img): " << get_rect(out_img) << "\n\t area: " << area << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) ); const_image_view imgv(in_img); image_view out_imgv(out_img); for (long r = area.top(); r <= area.bottom(); ++r) { for (long c = area.left(); c <= area.right(); ++c) { if (!interp(imgv, map_point(dlib::vector(c,r)), out_imgv[r][c])) set_background(out_imgv[r][c]); } } } // ---------------------------------------------------------------------------------------- template < typename image_type1, typename image_type2, typename interpolation_type, typename point_mapping_type, typename background_type > void transform_image ( const image_type1& in_img, image_type2& out_img, const interpolation_type& interp, const point_mapping_type& map_point, const background_type& set_background ) { // make sure requires clause is not broken DLIB_ASSERT( is_same_object(in_img, out_img) == false , "\t void transform_image()" << "\n\t Invalid inputs were given to this function." << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) ); transform_image(in_img, out_img, interp, map_point, set_background, get_rect(out_img)); } // ---------------------------------------------------------------------------------------- template < typename image_type1, typename image_type2, typename interpolation_type, typename point_mapping_type > void transform_image ( const image_type1& in_img, image_type2& out_img, const interpolation_type& interp, const point_mapping_type& map_point ) { // make sure requires clause is not broken DLIB_ASSERT( is_same_object(in_img, out_img) == false , "\t void transform_image()" << "\n\t Invalid inputs were given to this function." << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) ); transform_image(in_img, out_img, interp, map_point, black_background(), get_rect(out_img)); } // ---------------------------------------------------------------------------------------- template < typename image_type1, typename image_type2, typename interpolation_type > point_transform_affine rotate_image ( const image_type1& in_img, image_type2& out_img, double angle, const interpolation_type& interp ) { // make sure requires clause is not broken DLIB_ASSERT( is_same_object(in_img, out_img) == false , "\t point_transform_affine rotate_image()" << "\n\t Invalid inputs were given to this function." << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) ); const rectangle rimg = get_rect(in_img); // figure out bounding box for rotated rectangle rectangle rect; rect += rotate_point(center(rimg), rimg.tl_corner(), -angle); rect += rotate_point(center(rimg), rimg.tr_corner(), -angle); rect += rotate_point(center(rimg), rimg.bl_corner(), -angle); rect += rotate_point(center(rimg), rimg.br_corner(), -angle); set_image_size(out_img, rect.height(), rect.width()); const matrix R = rotation_matrix(angle); point_transform_affine trans = point_transform_affine(R, -R*dcenter(get_rect(out_img)) + dcenter(rimg)); transform_image(in_img, out_img, interp, trans); return inv(trans); } // ---------------------------------------------------------------------------------------- template < typename image_type1, typename image_type2 > point_transform_affine rotate_image ( const image_type1& in_img, image_type2& out_img, double angle ) { // make sure requires clause is not broken DLIB_ASSERT( is_same_object(in_img, out_img) == false , "\t point_transform_affine rotate_image()" << "\n\t Invalid inputs were given to this function." << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) ); return rotate_image(in_img, out_img, angle, interpolate_quadratic()); } // ---------------------------------------------------------------------------------------- namespace impl { class helper_resize_image { public: helper_resize_image( double x_scale_, double y_scale_ ): x_scale(x_scale_), y_scale(y_scale_) {} dlib::vector operator() ( const dlib::vector& p ) const { return dlib::vector(p.x()*x_scale, p.y()*y_scale); } private: const double x_scale; const double y_scale; }; } template < typename image_type1, typename image_type2, typename interpolation_type > void resize_image ( const image_type1& in_img, image_type2& out_img, const interpolation_type& interp ) { // make sure requires clause is not broken DLIB_ASSERT( is_same_object(in_img, out_img) == false , "\t void resize_image()" << "\n\t Invalid inputs were given to this function." << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) ); const double x_scale = (num_columns(in_img)-1)/(double)std::max((num_columns(out_img)-1),1); const double y_scale = (num_rows(in_img)-1)/(double)std::max((num_rows(out_img)-1),1); transform_image(in_img, out_img, interp, dlib::impl::helper_resize_image(x_scale,y_scale)); } // ---------------------------------------------------------------------------------------- template struct is_rgb_image { const static bool value = pixel_traits::pixel_type>::rgb; }; template struct is_grayscale_image { const static bool value = pixel_traits::pixel_type>::grayscale; }; // This is an optimized version of resize_image for the case where bilinear // interpolation is used. template < typename image_type1, typename image_type2 > typename disable_if_c<(is_rgb_image::value&&is_rgb_image::value) || (is_grayscale_image::value&&is_grayscale_image::value)>::type resize_image ( const image_type1& in_img_, image_type2& out_img_, interpolate_bilinear ) { // make sure requires clause is not broken DLIB_ASSERT( is_same_object(in_img_, out_img_) == false , "\t void resize_image()" << "\n\t Invalid inputs were given to this function." << "\n\t is_same_object(in_img_, out_img_): " << is_same_object(in_img_, out_img_) ); const_image_view in_img(in_img_); image_view out_img(out_img_); if (out_img.size() == 0 || in_img.size() == 0) return; typedef typename image_traits::pixel_type T; typedef typename image_traits::pixel_type U; const double x_scale = (in_img.nc()-1)/(double)std::max((out_img.nc()-1),1); const double y_scale = (in_img.nr()-1)/(double)std::max((out_img.nr()-1),1); double y = -y_scale; for (long r = 0; r < out_img.nr(); ++r) { y += y_scale; const long top = static_cast(std::floor(y)); const long bottom = std::min(top+1, in_img.nr()-1); const double tb_frac = y - top; double x = -x_scale; if (pixel_traits::grayscale) { for (long c = 0; c < out_img.nc(); ++c) { x += x_scale; const long left = static_cast(std::floor(x)); const long right = std::min(left+1, in_img.nc()-1); const double lr_frac = x - left; double tl = 0, tr = 0, bl = 0, br = 0; assign_pixel(tl, in_img[top][left]); assign_pixel(tr, in_img[top][right]); assign_pixel(bl, in_img[bottom][left]); assign_pixel(br, in_img[bottom][right]); double temp = (1-tb_frac)*((1-lr_frac)*tl + lr_frac*tr) + tb_frac*((1-lr_frac)*bl + lr_frac*br); assign_pixel(out_img[r][c], temp); } } else { for (long c = 0; c < out_img.nc(); ++c) { x += x_scale; const long left = static_cast(std::floor(x)); const long right = std::min(left+1, in_img.nc()-1); const double lr_frac = x - left; const T tl = in_img[top][left]; const T tr = in_img[top][right]; const T bl = in_img[bottom][left]; const T br = in_img[bottom][right]; T temp; assign_pixel(temp, 0); vector_to_pixel(temp, (1-tb_frac)*((1-lr_frac)*pixel_to_vector(tl) + lr_frac*pixel_to_vector(tr)) + tb_frac*((1-lr_frac)*pixel_to_vector(bl) + lr_frac*pixel_to_vector(br))); assign_pixel(out_img[r][c], temp); } } } } // ---------------------------------------------------------------------------------------- template < typename image_type1, typename image_type2 > struct images_have_same_pixel_types { typedef typename image_traits::pixel_type ptype1; typedef typename image_traits::pixel_type ptype2; const static bool value = is_same_type::value; }; template < typename image_type, typename image_type2 > typename enable_if_c::value && is_grayscale_image::value && images_have_same_pixel_types::value>::type resize_image ( const image_type& in_img_, image_type2& out_img_, interpolate_bilinear ) { // make sure requires clause is not broken DLIB_ASSERT( is_same_object(in_img_, out_img_) == false , "\t void resize_image()" << "\n\t Invalid inputs were given to this function." << "\n\t is_same_object(in_img_, out_img_): " << is_same_object(in_img_, out_img_) ); const_image_view in_img(in_img_); image_view out_img(out_img_); if (out_img.size() == 0 || in_img.size() == 0) return; typedef typename image_traits::pixel_type T; const double x_scale = (in_img.nc()-1)/(double)std::max((out_img.nc()-1),1); const double y_scale = (in_img.nr()-1)/(double)std::max((out_img.nr()-1),1); double y = -y_scale; for (long r = 0; r < out_img.nr(); ++r) { y += y_scale; const long top = static_cast(std::floor(y)); const long bottom = std::min(top+1, in_img.nr()-1); const double tb_frac = y - top; double x = -4*x_scale; const simd4f _tb_frac = tb_frac; const simd4f _inv_tb_frac = 1-tb_frac; const simd4f _x_scale = 4*x_scale; simd4f _x(x, x+x_scale, x+2*x_scale, x+3*x_scale); long c = 0; for (;; c+=4) { _x += _x_scale; simd4i left = simd4i(_x); simd4f _lr_frac = _x-left; simd4f _inv_lr_frac = 1-_lr_frac; simd4i right = left+1; simd4f tlf = _inv_tb_frac*_inv_lr_frac; simd4f trf = _inv_tb_frac*_lr_frac; simd4f blf = _tb_frac*_inv_lr_frac; simd4f brf = _tb_frac*_lr_frac; int32 fleft[4]; int32 fright[4]; left.store(fleft); right.store(fright); if (fright[3] >= in_img.nc()) break; simd4f tl(in_img[top][fleft[0]], in_img[top][fleft[1]], in_img[top][fleft[2]], in_img[top][fleft[3]]); simd4f tr(in_img[top][fright[0]], in_img[top][fright[1]], in_img[top][fright[2]], in_img[top][fright[3]]); simd4f bl(in_img[bottom][fleft[0]], in_img[bottom][fleft[1]], in_img[bottom][fleft[2]], in_img[bottom][fleft[3]]); simd4f br(in_img[bottom][fright[0]], in_img[bottom][fright[1]], in_img[bottom][fright[2]], in_img[bottom][fright[3]]); simd4f out = simd4f(tlf*tl + trf*tr + blf*bl + brf*br); float fout[4]; out.store(fout); out_img[r][c] = static_cast(fout[0]); out_img[r][c+1] = static_cast(fout[1]); out_img[r][c+2] = static_cast(fout[2]); out_img[r][c+3] = static_cast(fout[3]); } x = -x_scale + c*x_scale; for (; c < out_img.nc(); ++c) { x += x_scale; const long left = static_cast(std::floor(x)); const long right = std::min(left+1, in_img.nc()-1); const float lr_frac = x - left; float tl = 0, tr = 0, bl = 0, br = 0; assign_pixel(tl, in_img[top][left]); assign_pixel(tr, in_img[top][right]); assign_pixel(bl, in_img[bottom][left]); assign_pixel(br, in_img[bottom][right]); float temp = (1-tb_frac)*((1-lr_frac)*tl + lr_frac*tr) + tb_frac*((1-lr_frac)*bl + lr_frac*br); assign_pixel(out_img[r][c], temp); } } } // ---------------------------------------------------------------------------------------- template < typename image_type > typename enable_if >::type resize_image ( const image_type& in_img_, image_type& out_img_, interpolate_bilinear ) { // make sure requires clause is not broken DLIB_ASSERT( is_same_object(in_img_, out_img_) == false , "\t void resize_image()" << "\n\t Invalid inputs were given to this function." << "\n\t is_same_object(in_img_, out_img_): " << is_same_object(in_img_, out_img_) ); const_image_view in_img(in_img_); image_view out_img(out_img_); if (out_img.size() == 0 || in_img.size() == 0) return; typedef typename image_traits::pixel_type T; const double x_scale = (in_img.nc()-1)/(double)std::max((out_img.nc()-1),1); const double y_scale = (in_img.nr()-1)/(double)std::max((out_img.nr()-1),1); double y = -y_scale; for (long r = 0; r < out_img.nr(); ++r) { y += y_scale; const long top = static_cast(std::floor(y)); const long bottom = std::min(top+1, in_img.nr()-1); const double tb_frac = y - top; double x = -4*x_scale; const simd4f _tb_frac = tb_frac; const simd4f _inv_tb_frac = 1-tb_frac; const simd4f _x_scale = 4*x_scale; simd4f _x(x, x+x_scale, x+2*x_scale, x+3*x_scale); long c = 0; for (;; c+=4) { _x += _x_scale; simd4i left = simd4i(_x); simd4f lr_frac = _x-left; simd4f _inv_lr_frac = 1-lr_frac; simd4i right = left+1; simd4f tlf = _inv_tb_frac*_inv_lr_frac; simd4f trf = _inv_tb_frac*lr_frac; simd4f blf = _tb_frac*_inv_lr_frac; simd4f brf = _tb_frac*lr_frac; int32 fleft[4]; int32 fright[4]; left.store(fleft); right.store(fright); if (fright[3] >= in_img.nc()) break; simd4f tl(in_img[top][fleft[0]].red, in_img[top][fleft[1]].red, in_img[top][fleft[2]].red, in_img[top][fleft[3]].red); simd4f tr(in_img[top][fright[0]].red, in_img[top][fright[1]].red, in_img[top][fright[2]].red, in_img[top][fright[3]].red); simd4f bl(in_img[bottom][fleft[0]].red, in_img[bottom][fleft[1]].red, in_img[bottom][fleft[2]].red, in_img[bottom][fleft[3]].red); simd4f br(in_img[bottom][fright[0]].red, in_img[bottom][fright[1]].red, in_img[bottom][fright[2]].red, in_img[bottom][fright[3]].red); simd4i out = simd4i(tlf*tl + trf*tr + blf*bl + brf*br); int32 fout[4]; out.store(fout); out_img[r][c].red = static_cast(fout[0]); out_img[r][c+1].red = static_cast(fout[1]); out_img[r][c+2].red = static_cast(fout[2]); out_img[r][c+3].red = static_cast(fout[3]); tl = simd4f(in_img[top][fleft[0]].green, in_img[top][fleft[1]].green, in_img[top][fleft[2]].green, in_img[top][fleft[3]].green); tr = simd4f(in_img[top][fright[0]].green, in_img[top][fright[1]].green, in_img[top][fright[2]].green, in_img[top][fright[3]].green); bl = simd4f(in_img[bottom][fleft[0]].green, in_img[bottom][fleft[1]].green, in_img[bottom][fleft[2]].green, in_img[bottom][fleft[3]].green); br = simd4f(in_img[bottom][fright[0]].green, in_img[bottom][fright[1]].green, in_img[bottom][fright[2]].green, in_img[bottom][fright[3]].green); out = simd4i(tlf*tl + trf*tr + blf*bl + brf*br); out.store(fout); out_img[r][c].green = static_cast(fout[0]); out_img[r][c+1].green = static_cast(fout[1]); out_img[r][c+2].green = static_cast(fout[2]); out_img[r][c+3].green = static_cast(fout[3]); tl = simd4f(in_img[top][fleft[0]].blue, in_img[top][fleft[1]].blue, in_img[top][fleft[2]].blue, in_img[top][fleft[3]].blue); tr = simd4f(in_img[top][fright[0]].blue, in_img[top][fright[1]].blue, in_img[top][fright[2]].blue, in_img[top][fright[3]].blue); bl = simd4f(in_img[bottom][fleft[0]].blue, in_img[bottom][fleft[1]].blue, in_img[bottom][fleft[2]].blue, in_img[bottom][fleft[3]].blue); br = simd4f(in_img[bottom][fright[0]].blue, in_img[bottom][fright[1]].blue, in_img[bottom][fright[2]].blue, in_img[bottom][fright[3]].blue); out = simd4i(tlf*tl + trf*tr + blf*bl + brf*br); out.store(fout); out_img[r][c].blue = static_cast(fout[0]); out_img[r][c+1].blue = static_cast(fout[1]); out_img[r][c+2].blue = static_cast(fout[2]); out_img[r][c+3].blue = static_cast(fout[3]); } x = -x_scale + c*x_scale; for (; c < out_img.nc(); ++c) { x += x_scale; const long left = static_cast(std::floor(x)); const long right = std::min(left+1, in_img.nc()-1); const double lr_frac = x - left; const T tl = in_img[top][left]; const T tr = in_img[top][right]; const T bl = in_img[bottom][left]; const T br = in_img[bottom][right]; T temp; assign_pixel(temp, 0); vector_to_pixel(temp, (1-tb_frac)*((1-lr_frac)*pixel_to_vector(tl) + lr_frac*pixel_to_vector(tr)) + tb_frac*((1-lr_frac)*pixel_to_vector(bl) + lr_frac*pixel_to_vector(br))); assign_pixel(out_img[r][c], temp); } } } // ---------------------------------------------------------------------------------------- template < typename image_type1, typename image_type2 > void resize_image ( const image_type1& in_img, image_type2& out_img ) { // make sure requires clause is not broken DLIB_ASSERT( is_same_object(in_img, out_img) == false , "\t void resize_image()" << "\n\t Invalid inputs were given to this function." << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) ); resize_image(in_img, out_img, interpolate_bilinear()); } // ---------------------------------------------------------------------------------------- template < typename image_type > void resize_image ( double size_scale, image_type& img ) { // make sure requires clause is not broken DLIB_ASSERT( size_scale > 0 , "\t void resize_image()" << "\n\t Invalid inputs were given to this function." << "\n\t size_scale: " << size_scale ); image_type temp; set_image_size(temp, std::round(size_scale*num_rows(img)), std::round(size_scale*num_columns(img))); resize_image(img, temp); swap(img, temp); } // ---------------------------------------------------------------------------------------- template < typename image_type1, typename image_type2 > point_transform_affine flip_image_left_right ( const image_type1& in_img, image_type2& out_img ) { // make sure requires clause is not broken DLIB_ASSERT( is_same_object(in_img, out_img) == false , "\t void flip_image_left_right()" << "\n\t Invalid inputs were given to this function." << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) ); assign_image(out_img, fliplr(mat(in_img))); std::vector > from, to; rectangle r = get_rect(in_img); from.push_back(r.tl_corner()); to.push_back(r.tr_corner()); from.push_back(r.bl_corner()); to.push_back(r.br_corner()); from.push_back(r.tr_corner()); to.push_back(r.tl_corner()); from.push_back(r.br_corner()); to.push_back(r.bl_corner()); return find_affine_transform(from,to); } // ---------------------------------------------------------------------------------------- template < typename image_type > point_transform_affine flip_image_left_right ( image_type& img ) { image_type temp; auto tform = flip_image_left_right(img, temp); swap(temp,img); return tform; } // ---------------------------------------------------------------------------------------- template < typename image_type1, typename image_type2 > void flip_image_up_down ( const image_type1& in_img, image_type2& out_img ) { // make sure requires clause is not broken DLIB_ASSERT( is_same_object(in_img, out_img) == false , "\t void flip_image_up_down()" << "\n\t Invalid inputs were given to this function." << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) ); assign_image(out_img, flipud(mat(in_img))); } // ---------------------------------------------------------------------------------------- namespace impl { inline rectangle flip_rect_left_right ( const rectangle& rect, const rectangle& window ) { rectangle temp; temp.top() = rect.top(); temp.bottom() = rect.bottom(); const long left_dist = rect.left()-window.left(); temp.right() = window.right()-left_dist; temp.left() = temp.right()-rect.width()+1; return temp; } inline rectangle tform_object ( const point_transform_affine& tran, const rectangle& rect ) { return centered_rect(tran(center(rect)), rect.width(), rect.height()); } inline mmod_rect tform_object ( const point_transform_affine& tran, mmod_rect rect ) { rect.rect = tform_object(tran, rect.rect); return rect; } inline full_object_detection tform_object( const point_transform_affine& tran, const full_object_detection& obj ) { std::vector parts; parts.reserve(obj.num_parts()); for (unsigned long i = 0; i < obj.num_parts(); ++i) { if (obj.part(i) != OBJECT_PART_NOT_PRESENT) parts.push_back(tran(obj.part(i))); else parts.push_back(OBJECT_PART_NOT_PRESENT); } return full_object_detection(tform_object(tran,obj.get_rect()), parts); } } // ---------------------------------------------------------------------------------------- template < typename image_array_type, typename T > void add_image_left_right_flips ( image_array_type& images, std::vector >& objects ) { // make sure requires clause is not broken DLIB_ASSERT( images.size() == objects.size(), "\t void add_image_left_right_flips()" << "\n\t Invalid inputs were given to this function." << "\n\t images.size(): " << images.size() << "\n\t objects.size(): " << objects.size() ); typename image_array_type::value_type temp; std::vector rects; const unsigned long num = images.size(); for (unsigned long j = 0; j < num; ++j) { const point_transform_affine tran = flip_image_left_right(images[j], temp); rects.clear(); for (unsigned long i = 0; i < objects[j].size(); ++i) rects.push_back(impl::tform_object(tran, objects[j][i])); images.push_back(std::move(temp)); objects.push_back(rects); } } // ---------------------------------------------------------------------------------------- template < typename image_array_type, typename T, typename U > void add_image_left_right_flips ( image_array_type& images, std::vector >& objects, std::vector >& objects2 ) { // make sure requires clause is not broken DLIB_ASSERT( images.size() == objects.size() && images.size() == objects2.size(), "\t void add_image_left_right_flips()" << "\n\t Invalid inputs were given to this function." << "\n\t images.size(): " << images.size() << "\n\t objects.size(): " << objects.size() << "\n\t objects2.size(): " << objects2.size() ); typename image_array_type::value_type temp; std::vector rects; std::vector rects2; const unsigned long num = images.size(); for (unsigned long j = 0; j < num; ++j) { const point_transform_affine tran = flip_image_left_right(images[j], temp); images.push_back(std::move(temp)); rects.clear(); for (unsigned long i = 0; i < objects[j].size(); ++i) rects.push_back(impl::tform_object(tran, objects[j][i])); objects.push_back(rects); rects2.clear(); for (unsigned long i = 0; i < objects2[j].size(); ++i) rects2.push_back(impl::tform_object(tran, objects2[j][i])); objects2.push_back(rects2); } } // ---------------------------------------------------------------------------------------- template void flip_image_dataset_left_right ( image_array_type& images, std::vector >& objects ) { // make sure requires clause is not broken DLIB_ASSERT( images.size() == objects.size(), "\t void flip_image_dataset_left_right()" << "\n\t Invalid inputs were given to this function." << "\n\t images.size(): " << images.size() << "\n\t objects.size(): " << objects.size() ); typename image_array_type::value_type temp; for (unsigned long i = 0; i < images.size(); ++i) { flip_image_left_right(images[i], temp); swap(temp,images[i]); for (unsigned long j = 0; j < objects[i].size(); ++j) { objects[i][j] = impl::flip_rect_left_right(objects[i][j], get_rect(images[i])); } } } // ---------------------------------------------------------------------------------------- template void flip_image_dataset_left_right ( image_array_type& images, std::vector >& objects, std::vector >& objects2 ) { // make sure requires clause is not broken DLIB_ASSERT( images.size() == objects.size() && images.size() == objects2.size(), "\t void flip_image_dataset_left_right()" << "\n\t Invalid inputs were given to this function." << "\n\t images.size(): " << images.size() << "\n\t objects.size(): " << objects.size() << "\n\t objects2.size(): " << objects2.size() ); typename image_array_type::value_type temp; for (unsigned long i = 0; i < images.size(); ++i) { flip_image_left_right(images[i], temp); swap(temp, images[i]); for (unsigned long j = 0; j < objects[i].size(); ++j) { objects[i][j] = impl::flip_rect_left_right(objects[i][j], get_rect(images[i])); } for (unsigned long j = 0; j < objects2[i].size(); ++j) { objects2[i][j] = impl::flip_rect_left_right(objects2[i][j], get_rect(images[i])); } } } // ---------------------------------------------------------------------------------------- template < typename pyramid_type, typename image_array_type > void upsample_image_dataset ( image_array_type& images, std::vector >& objects, unsigned long max_image_size = std::numeric_limits::max() ) { // make sure requires clause is not broken DLIB_ASSERT( images.size() == objects.size(), "\t void upsample_image_dataset()" << "\n\t Invalid inputs were given to this function." << "\n\t images.size(): " << images.size() << "\n\t objects.size(): " << objects.size() ); typename image_array_type::value_type temp; pyramid_type pyr; for (unsigned long i = 0; i < images.size(); ++i) { const unsigned long img_size = num_rows(images[i])*num_columns(images[i]); if (img_size <= max_image_size) { pyramid_up(images[i], temp, pyr); swap(temp, images[i]); for (unsigned long j = 0; j < objects[i].size(); ++j) { objects[i][j] = pyr.rect_up(objects[i][j]); } } } } template < typename pyramid_type, typename image_array_type > void upsample_image_dataset ( image_array_type& images, std::vector>& objects, unsigned long max_image_size = std::numeric_limits::max() ) { // make sure requires clause is not broken DLIB_ASSERT( images.size() == objects.size(), "\t void upsample_image_dataset()" << "\n\t Invalid inputs were given to this function." << "\n\t images.size(): " << images.size() << "\n\t objects.size(): " << objects.size() ); typename image_array_type::value_type temp; pyramid_type pyr; for (unsigned long i = 0; i < images.size(); ++i) { const unsigned long img_size = num_rows(images[i])*num_columns(images[i]); if (img_size <= max_image_size) { pyramid_up(images[i], temp, pyr); swap(temp, images[i]); for (unsigned long j = 0; j < objects[i].size(); ++j) { objects[i][j].rect = pyr.rect_up(objects[i][j].rect); } } } } template < typename pyramid_type, typename image_array_type > void upsample_image_dataset ( image_array_type& images, std::vector >& objects, std::vector >& objects2, unsigned long max_image_size = std::numeric_limits::max() ) { // make sure requires clause is not broken DLIB_ASSERT( images.size() == objects.size() && images.size() == objects2.size(), "\t void upsample_image_dataset()" << "\n\t Invalid inputs were given to this function." << "\n\t images.size(): " << images.size() << "\n\t objects.size(): " << objects.size() << "\n\t objects2.size(): " << objects2.size() ); typename image_array_type::value_type temp; pyramid_type pyr; for (unsigned long i = 0; i < images.size(); ++i) { const unsigned long img_size = num_rows(images[i])*num_columns(images[i]); if (img_size <= max_image_size) { pyramid_up(images[i], temp, pyr); swap(temp, images[i]); for (unsigned long j = 0; j < objects[i].size(); ++j) { objects[i][j] = pyr.rect_up(objects[i][j]); } for (unsigned long j = 0; j < objects2[i].size(); ++j) { objects2[i][j] = pyr.rect_up(objects2[i][j]); } } } } // ---------------------------------------------------------------------------------------- template void rotate_image_dataset ( double angle, image_array_type& images, std::vector >& objects ) { // make sure requires clause is not broken DLIB_ASSERT( images.size() == objects.size(), "\t void rotate_image_dataset()" << "\n\t Invalid inputs were given to this function." << "\n\t images.size(): " << images.size() << "\n\t objects.size(): " << objects.size() ); typename image_array_type::value_type temp; for (unsigned long i = 0; i < images.size(); ++i) { const point_transform_affine tran = rotate_image(images[i], temp, angle); swap(temp, images[i]); for (unsigned long j = 0; j < objects[i].size(); ++j) { const rectangle rect = objects[i][j]; objects[i][j] = centered_rect(tran(center(rect)), rect.width(), rect.height()); } } } template void rotate_image_dataset ( double angle, image_array_type& images, std::vector >& objects, std::vector >& objects2 ) { // make sure requires clause is not broken DLIB_ASSERT( images.size() == objects.size() && images.size() == objects2.size(), "\t void rotate_image_dataset()" << "\n\t Invalid inputs were given to this function." << "\n\t images.size(): " << images.size() << "\n\t objects.size(): " << objects.size() << "\n\t objects2.size(): " << objects2.size() ); typename image_array_type::value_type temp; for (unsigned long i = 0; i < images.size(); ++i) { const point_transform_affine tran = rotate_image(images[i], temp, angle); swap(temp, images[i]); for (unsigned long j = 0; j < objects[i].size(); ++j) { const rectangle rect = objects[i][j]; objects[i][j] = centered_rect(tran(center(rect)), rect.width(), rect.height()); } for (unsigned long j = 0; j < objects2[i].size(); ++j) { const rectangle rect = objects2[i][j]; objects2[i][j] = centered_rect(tran(center(rect)), rect.width(), rect.height()); } } } // ---------------------------------------------------------------------------------------- template < typename image_array_type, typename EXP, typename T, typename U > void add_image_rotations ( const matrix_exp& angles, image_array_type& images, std::vector >& objects, std::vector >& objects2 ) { // make sure requires clause is not broken DLIB_ASSERT( is_vector(angles) && angles.size() > 0 && images.size() == objects.size() && images.size() == objects2.size(), "\t void add_image_rotations()" << "\n\t Invalid inputs were given to this function." << "\n\t is_vector(angles): " << is_vector(angles) << "\n\t angles.size(): " << angles.size() << "\n\t images.size(): " << images.size() << "\n\t objects.size(): " << objects.size() << "\n\t objects2.size(): " << objects2.size() ); image_array_type new_images; std::vector > new_objects; std::vector > new_objects2; using namespace impl; std::vector objtemp; std::vector objtemp2; typename image_array_type::value_type temp; for (long i = 0; i < angles.size(); ++i) { for (unsigned long j = 0; j < images.size(); ++j) { const point_transform_affine tran = rotate_image(images[j], temp, angles(i)); new_images.push_back(std::move(temp)); objtemp.clear(); for (unsigned long k = 0; k < objects[j].size(); ++k) objtemp.push_back(tform_object(tran, objects[j][k])); new_objects.push_back(objtemp); objtemp2.clear(); for (unsigned long k = 0; k < objects2[j].size(); ++k) objtemp2.push_back(tform_object(tran, objects2[j][k])); new_objects2.push_back(objtemp2); } } new_images.swap(images); new_objects.swap(objects); new_objects2.swap(objects2); } // ---------------------------------------------------------------------------------------- template < typename image_array_type, typename EXP, typename T > void add_image_rotations ( const matrix_exp& angles, image_array_type& images, std::vector >& objects ) { std::vector > objects2(objects.size()); add_image_rotations(angles, images, objects, objects2); } // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- template < typename image_type1, typename image_type2, typename pyramid_type, typename interpolation_type > void pyramid_up ( const image_type1& in_img, image_type2& out_img, const pyramid_type& pyr, const interpolation_type& interp ) { // make sure requires clause is not broken DLIB_ASSERT( is_same_object(in_img, out_img) == false , "\t void pyramid_up()" << "\n\t Invalid inputs were given to this function." << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) ); if (image_size(in_img) == 0) { set_image_size(out_img, 0, 0); return; } rectangle rect = get_rect(in_img); rectangle uprect = pyr.rect_up(rect); if (uprect.is_empty()) { set_image_size(out_img, 0, 0); return; } set_image_size(out_img, uprect.bottom()+1, uprect.right()+1); resize_image(in_img, out_img, interp); } // ---------------------------------------------------------------------------------------- template < typename image_type1, typename image_type2, typename pyramid_type > void pyramid_up ( const image_type1& in_img, image_type2& out_img, const pyramid_type& pyr ) { // make sure requires clause is not broken DLIB_ASSERT( is_same_object(in_img, out_img) == false , "\t void pyramid_up()" << "\n\t Invalid inputs were given to this function." << "\n\t is_same_object(in_img, out_img): " << is_same_object(in_img, out_img) ); pyramid_up(in_img, out_img, pyr, interpolate_bilinear()); } // ---------------------------------------------------------------------------------------- template < typename image_type, typename pyramid_type > void pyramid_up ( image_type& img, const pyramid_type& pyr ) { image_type temp; pyramid_up(img, temp, pyr); swap(temp, img); } // ---------------------------------------------------------------------------------------- template < typename image_type > void pyramid_up ( image_type& img ) { pyramid_down<2> pyr; pyramid_up(img, pyr); } // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- struct chip_dims { chip_dims ( unsigned long rows_, unsigned long cols_ ) : rows(rows_), cols(cols_) { } unsigned long rows; unsigned long cols; }; struct chip_details { chip_details() : angle(0), rows(0), cols(0) {} chip_details(const rectangle& rect_) : rect(rect_),angle(0), rows(rect_.height()), cols(rect_.width()) {} chip_details(const drectangle& rect_) : rect(rect_),angle(0), rows((unsigned long)(rect_.height()+0.5)), cols((unsigned long)(rect_.width()+0.5)) {} chip_details(const drectangle& rect_, unsigned long size) : rect(rect_),angle(0) { compute_dims_from_size(size); } chip_details(const drectangle& rect_, unsigned long size, double angle_) : rect(rect_),angle(angle_) { compute_dims_from_size(size); } chip_details(const drectangle& rect_, const chip_dims& dims) : rect(rect_),angle(0),rows(dims.rows), cols(dims.cols) {} chip_details(const drectangle& rect_, const chip_dims& dims, double angle_) : rect(rect_),angle(angle_),rows(dims.rows), cols(dims.cols) {} template chip_details( const std::vector >& chip_points, const std::vector >& img_points, const chip_dims& dims ) : rows(dims.rows), cols(dims.cols) { DLIB_CASSERT( chip_points.size() == img_points.size() && chip_points.size() >= 2, "\t chip_details::chip_details(chip_points,img_points,dims)" << "\n\t Invalid inputs were given to this function." << "\n\t chip_points.size(): " << chip_points.size() << "\n\t img_points.size(): " << img_points.size() ); const point_transform_affine tform = find_similarity_transform(chip_points,img_points); dlib::vector p(1,0); p = tform.get_m()*p; // There are only 3 things happening in a similarity transform. There is a // rescaling, a rotation, and a translation. So here we pick out the scale and // rotation parameters. angle = std::atan2(p.y(),p.x()); // Note that the translation and scale part are represented by the extraction // rectangle. So here we build the appropriate rectangle. const double scale = length(p); rect = centered_drect(tform(point(dims.cols,dims.rows)/2.0), dims.cols*scale, dims.rows*scale); } drectangle rect; double angle; unsigned long rows; unsigned long cols; inline unsigned long size() const { return rows*cols; } private: void compute_dims_from_size ( unsigned long size ) { const double relative_size = std::sqrt(size/(double)rect.area()); rows = static_cast(rect.height()*relative_size + 0.5); cols = static_cast(size/(double)rows + 0.5); rows = std::max(1ul,rows); cols = std::max(1ul,cols); } }; // ---------------------------------------------------------------------------------------- inline point_transform_affine get_mapping_to_chip ( const chip_details& details ) { std::vector > from, to; point p1(0,0); point p2(details.cols-1,0); point p3(details.cols-1, details.rows-1); to.push_back(p1); from.push_back(rotate_point(center(details.rect),details.rect.tl_corner(),details.angle)); to.push_back(p2); from.push_back(rotate_point(center(details.rect),details.rect.tr_corner(),details.angle)); to.push_back(p3); from.push_back(rotate_point(center(details.rect),details.rect.br_corner(),details.angle)); return find_affine_transform(from, to); } // ---------------------------------------------------------------------------------------- inline full_object_detection map_det_to_chip( const full_object_detection& det, const chip_details& details ) { point_transform_affine tform = get_mapping_to_chip(details); full_object_detection res(det); // map the parts for (unsigned long l = 0; l < det.num_parts(); ++l) { if (det.part(l) != OBJECT_PART_NOT_PRESENT) res.part(l) = tform(det.part(l)); else res.part(l) = OBJECT_PART_NOT_PRESENT; } // map the main rectangle rectangle rect; rect += tform(det.get_rect().tl_corner()); rect += tform(det.get_rect().tr_corner()); rect += tform(det.get_rect().bl_corner()); rect += tform(det.get_rect().br_corner()); res.get_rect() = rect; return res; } // ---------------------------------------------------------------------------------------- namespace impl { template < typename image_type1, typename image_type2 > void basic_extract_image_chip ( const image_type1& img, const rectangle& location, image_type2& chip ) /*! ensures - This function doesn't do any scaling or rotating. It just pulls out the chip in the given rectangle. This also means the output image has the same dimensions as the location rectangle. !*/ { const_image_view vimg(img); image_view vchip(chip); vchip.set_size(location.height(), location.width()); // location might go outside img so clip it rectangle area = location.intersect(get_rect(img)); // find the part of the chip that corresponds to area in img. rectangle chip_area = translate_rect(area, -location.tl_corner()); zero_border_pixels(chip, chip_area); // now pull out the contents of area/chip_area. for (long r = chip_area.top(), rr = area.top(); r <= chip_area.bottom(); ++r,++rr) { for (long c = chip_area.left(), cc = area.left(); c <= chip_area.right(); ++c,++cc) { assign_pixel(vchip[r][c], vimg[rr][cc]); } } } } // ---------------------------------------------------------------------------------------- template < typename image_type1, typename image_type2, typename interpolation_type > void extract_image_chips ( const image_type1& img, const std::vector& chip_locations, dlib::array& chips, const interpolation_type& interp ) { // make sure requires clause is not broken #ifdef ENABLE_ASSERTS for (unsigned long i = 0; i < chip_locations.size(); ++i) { DLIB_CASSERT(chip_locations[i].size() != 0 && chip_locations[i].rect.is_empty() == false, "\t void extract_image_chips()" << "\n\t Invalid inputs were given to this function." << "\n\t chip_locations["< pyr; long max_depth = 0; // If the chip is supposed to be much smaller than the source subwindow then you // can't just extract it using bilinear interpolation since at a high enough // downsampling amount it would effectively turn into nearest neighbor // interpolation. So we use an image pyramid to make sure the interpolation is // fast but also high quality. The first thing we do is figure out how deep the // image pyramid needs to be. rectangle bounding_box; for (unsigned long i = 0; i < chip_locations.size(); ++i) { long depth = 0; double grow = 2; drectangle rect = pyr.rect_down(chip_locations[i].rect); while (rect.area() > chip_locations[i].size()) { rect = pyr.rect_down(rect); ++depth; // We drop the image size by a factor of 2 each iteration and then assume a // border of 2 pixels is needed to avoid any border effects of the crop. grow = grow*2 + 2; } drectangle rot_rect; const vector cent = center(chip_locations[i].rect); rot_rect += rotate_point(cent,chip_locations[i].rect.tl_corner(),chip_locations[i].angle); rot_rect += rotate_point(cent,chip_locations[i].rect.tr_corner(),chip_locations[i].angle); rot_rect += rotate_point(cent,chip_locations[i].rect.bl_corner(),chip_locations[i].angle); rot_rect += rotate_point(cent,chip_locations[i].rect.br_corner(),chip_locations[i].angle); bounding_box += grow_rect(rot_rect, grow).intersect(get_rect(img)); max_depth = std::max(depth,max_depth); } //std::cout << "max_depth: " << max_depth << std::endl; //std::cout << "crop amount: " << bounding_box.area()/(double)get_rect(img).area() << std::endl; // now make an image pyramid dlib::array::pixel_type> > levels(max_depth); if (levels.size() != 0) pyr(sub_image(img,bounding_box),levels[0]); for (unsigned long i = 1; i < levels.size(); ++i) pyr(levels[i-1],levels[i]); std::vector > from, to; // now pull out the chips chips.resize(chip_locations.size()); for (unsigned long i = 0; i < chips.size(); ++i) { // If the chip doesn't have any rotation or scaling then use the basic version // of chip extraction that just does a fast copy. if (chip_locations[i].angle == 0 && chip_locations[i].rows == chip_locations[i].rect.height() && chip_locations[i].cols == chip_locations[i].rect.width()) { impl::basic_extract_image_chip(img, chip_locations[i].rect, chips[i]); } else { set_image_size(chips[i], chip_locations[i].rows, chip_locations[i].cols); // figure out which level in the pyramid to use to extract the chip int level = -1; drectangle rect = translate_rect(chip_locations[i].rect, -bounding_box.tl_corner()); while (pyr.rect_down(rect).area() > chip_locations[i].size()) { ++level; rect = pyr.rect_down(rect); } // find the appropriate transformation that maps from the chip to the input // image from.clear(); to.clear(); from.push_back(get_rect(chips[i]).tl_corner()); to.push_back(rotate_point(center(rect),rect.tl_corner(),chip_locations[i].angle)); from.push_back(get_rect(chips[i]).tr_corner()); to.push_back(rotate_point(center(rect),rect.tr_corner(),chip_locations[i].angle)); from.push_back(get_rect(chips[i]).bl_corner()); to.push_back(rotate_point(center(rect),rect.bl_corner(),chip_locations[i].angle)); point_transform_affine trns = find_affine_transform(from,to); // now extract the actual chip if (level == -1) transform_image(sub_image(img,bounding_box),chips[i],interp,trns); else transform_image(levels[level],chips[i],interp,trns); } } } // ---------------------------------------------------------------------------------------- template < typename image_type1, typename image_type2 > void extract_image_chips( const image_type1& img, const std::vector& chip_locations, dlib::array& chips ) { extract_image_chips(img, chip_locations, chips, interpolate_bilinear()); } // ---------------------------------------------------------------------------------------- template < typename image_type1, typename image_type2, typename interpolation_type > void extract_image_chip ( const image_type1& img, const chip_details& location, image_type2& chip, const interpolation_type& interp ) { // If the chip doesn't have any rotation or scaling then use the basic version of // chip extraction that just does a fast copy. if (location.angle == 0 && location.rows == location.rect.height() && location.cols == location.rect.width()) { impl::basic_extract_image_chip(img, location.rect, chip); } else { std::vector chip_locations(1,location); dlib::array chips; extract_image_chips(img, chip_locations, chips, interp); swap(chips[0], chip); } } // ---------------------------------------------------------------------------------------- template < typename image_type1, typename image_type2 > void extract_image_chip ( const image_type1& img, const chip_details& location, image_type2& chip ) { extract_image_chip(img, location, chip, interpolate_bilinear()); } // ---------------------------------------------------------------------------------------- inline chip_details get_face_chip_details ( const full_object_detection& det, const unsigned long size = 200, const double padding = 0.2 ) { DLIB_CASSERT(det.num_parts() == 68 || det.num_parts() == 5, "\t chip_details get_face_chip_details()" << "\n\t You have to give either a 5 point or 68 point face landmarking output to this function. " << "\n\t det.num_parts(): " << det.num_parts() ); DLIB_CASSERT(padding >= 0 && size > 0, "\t chip_details get_face_chip_details()" << "\n\t Invalid inputs were given to this function." << "\n\t padding: " << padding << "\n\t size: " << size ); std::vector from_points, to_points; if (det.num_parts() == 5) { dpoint p0(0.8595674595992, 0.2134981538014); dpoint p1(0.6460604764104, 0.2289674387677); dpoint p2(0.1205750620789, 0.2137274526848); dpoint p3(0.3340850613712, 0.2290642403242); dpoint p4(0.4901123135679, 0.6277975316475); p0 = (padding+p0)/(2*padding+1); p1 = (padding+p1)/(2*padding+1); p2 = (padding+p2)/(2*padding+1); p3 = (padding+p3)/(2*padding+1); p4 = (padding+p4)/(2*padding+1); from_points.push_back(p0*size); to_points.push_back(det.part(0)); from_points.push_back(p1*size); to_points.push_back(det.part(1)); from_points.push_back(p2*size); to_points.push_back(det.part(2)); from_points.push_back(p3*size); to_points.push_back(det.part(3)); from_points.push_back(p4*size); to_points.push_back(det.part(4)); } else { // Average positions of face points 17-67 const double mean_face_shape_x[] = { 0.000213256, 0.0752622, 0.18113, 0.29077, 0.393397, 0.586856, 0.689483, 0.799124, 0.904991, 0.98004, 0.490127, 0.490127, 0.490127, 0.490127, 0.36688, 0.426036, 0.490127, 0.554217, 0.613373, 0.121737, 0.187122, 0.265825, 0.334606, 0.260918, 0.182743, 0.645647, 0.714428, 0.793132, 0.858516, 0.79751, 0.719335, 0.254149, 0.340985, 0.428858, 0.490127, 0.551395, 0.639268, 0.726104, 0.642159, 0.556721, 0.490127, 0.423532, 0.338094, 0.290379, 0.428096, 0.490127, 0.552157, 0.689874, 0.553364, 0.490127, 0.42689 }; const double mean_face_shape_y[] = { 0.106454, 0.038915, 0.0187482, 0.0344891, 0.0773906, 0.0773906, 0.0344891, 0.0187482, 0.038915, 0.106454, 0.203352, 0.307009, 0.409805, 0.515625, 0.587326, 0.609345, 0.628106, 0.609345, 0.587326, 0.216423, 0.178758, 0.179852, 0.231733, 0.245099, 0.244077, 0.231733, 0.179852, 0.178758, 0.216423, 0.244077, 0.245099, 0.780233, 0.745405, 0.727388, 0.742578, 0.727388, 0.745405, 0.780233, 0.864805, 0.902192, 0.909281, 0.902192, 0.864805, 0.784792, 0.778746, 0.785343, 0.778746, 0.784792, 0.824182, 0.831803, 0.824182 }; COMPILE_TIME_ASSERT(sizeof(mean_face_shape_x)/sizeof(double) == 68-17); for (unsigned long i = 17; i < det.num_parts(); ++i) { // Ignore the lower lip if ((55 <= i && i <= 59) || (65 <= i && i <= 67)) continue; // Ignore the eyebrows if (17 <= i && i <= 26) continue; dpoint p; p.x() = (padding+mean_face_shape_x[i-17])/(2*padding+1); p.y() = (padding+mean_face_shape_y[i-17])/(2*padding+1); from_points.push_back(p*size); to_points.push_back(det.part(i)); } } return chip_details(from_points, to_points, chip_dims(size,size)); } // ---------------------------------------------------------------------------------------- inline std::vector get_face_chip_details ( const std::vector& dets, const unsigned long size = 200, const double padding = 0.2 ) { std::vector res; res.reserve(dets.size()); for (unsigned long i = 0; i < dets.size(); ++i) res.push_back(get_face_chip_details(dets[i], size, padding)); return res; } // ---------------------------------------------------------------------------------------- template < typename image_type > image_type jitter_image( const image_type& img, dlib::rand& rnd ) { DLIB_CASSERT(num_rows(img)*num_columns(img) != 0); DLIB_CASSERT(num_rows(img)==num_columns(img)); const double max_rotation_degrees = 3; const double min_object_height = 0.97; const double max_object_height = 0.99999; const double translate_amount = 0.02; const auto rect = shrink_rect(get_rect(img),3); // perturb the location of the crop by a small fraction of the object's size. const point rand_translate = dpoint(rnd.get_double_in_range(-translate_amount,translate_amount)*rect.width(), rnd.get_double_in_range(-translate_amount,translate_amount)*rect.height()); // perturb the scale of the crop by a fraction of the object's size const double rand_scale_perturb = rnd.get_double_in_range(min_object_height, max_object_height); const long box_size = rect.height()/rand_scale_perturb; const auto crop_rect = centered_rect(center(rect)+rand_translate, box_size, box_size); const double angle = rnd.get_double_in_range(-max_rotation_degrees, max_rotation_degrees)*pi/180; image_type crop; extract_image_chip(img, chip_details(crop_rect, chip_dims(img.nr(),img.nc()), angle), crop); if (rnd.get_random_double() > 0.5) flip_image_left_right(crop); return crop; } // ---------------------------------------------------------------------------------------- } #endif // DLIB_INTERPOlATIONh_