diff options
Diffstat (limited to 'ml/dlib/dlib/image_keypoint')
19 files changed, 0 insertions, 6179 deletions
diff --git a/ml/dlib/dlib/image_keypoint/binned_vector_feature_image.h b/ml/dlib/dlib/image_keypoint/binned_vector_feature_image.h deleted file mode 100644 index 019a12739..000000000 --- a/ml/dlib/dlib/image_keypoint/binned_vector_feature_image.h +++ /dev/null @@ -1,433 +0,0 @@ -// Copyright (C) 2013 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_BINNED_VECTOR_IMAGE_FEATUrES_Hh_ -#define DLIB_BINNED_VECTOR_IMAGE_FEATUrES_Hh_ - -#include "../lsh/projection_hash.h" -#include "binned_vector_feature_image_abstract.h" -#include <vector> -#include "../algs.h" -#include "../matrix.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type_ = projection_hash - > - class binned_vector_feature_image : noncopyable - { - - public: - typedef feature_extractor feature_extractor_type; - typedef hash_function_type_ hash_function_type; - - typedef std::vector<std::pair<unsigned int,double> > descriptor_type; - - binned_vector_feature_image ( - ); - - void clear ( - ); - - void set_hash ( - const hash_function_type& hash_ - ); - - const hash_function_type& get_hash ( - ) const; - - void copy_configuration ( - const feature_extractor& item - ); - - void copy_configuration ( - const binned_vector_feature_image& item - ); - - template < - typename image_type - > - inline void load ( - const image_type& img - ); - - inline size_t size ( - ) const; - - inline long nr ( - ) const; - - inline long nc ( - ) const; - - inline long get_num_dimensions ( - ) const; - - inline const descriptor_type& operator() ( - long row, - long col - ) const; - - inline const rectangle get_block_rect ( - long row, - long col - ) const; - - inline const point image_to_feat_space ( - const point& p - ) const; - - inline const rectangle image_to_feat_space ( - const rectangle& rect - ) const; - - inline const point feat_to_image_space ( - const point& p - ) const; - - inline const rectangle feat_to_image_space ( - const rectangle& rect - ) const; - - template <typename T> - friend void serialize ( - const binned_vector_feature_image<T>& item, - std::ostream& out - ); - - template <typename T> - friend void deserialize ( - binned_vector_feature_image<T>& item, - std::istream& in - ); - - private: - - array2d<descriptor_type> feats; - feature_extractor fe; - hash_function_type phash; - }; - -// ---------------------------------------------------------------------------------------- - - template <typename T> - void serialize ( - const binned_vector_feature_image<T>& item, - std::ostream& out - ) - { - int version = 1; - serialize(version, out); - serialize(item.feats, out); - serialize(item.fe, out); - serialize(item.phash, out); - } - - template <typename T> - void deserialize ( - binned_vector_feature_image<T>& item, - std::istream& in - ) - { - int version = 0; - deserialize(version, in); - if (version != 1) - throw dlib::serialization_error("Unexpected version found while deserializing dlib::binned_vector_feature_image"); - deserialize(item.feats, in); - deserialize(item.fe, in); - deserialize(item.phash, in); - } - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// binned_vector_feature_image member functions -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - binned_vector_feature_image<feature_extractor,hash_function_type>:: - binned_vector_feature_image ( - ) - { - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - void binned_vector_feature_image<feature_extractor,hash_function_type>:: - clear ( - ) - { - fe.clear(); - phash = hash_function_type(); - feats.clear(); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - void binned_vector_feature_image<feature_extractor,hash_function_type>:: - set_hash ( - const hash_function_type& hash_ - ) - { - phash = hash_; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - const hash_function_type& binned_vector_feature_image<feature_extractor,hash_function_type>:: - get_hash ( - ) const - { - return phash; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - void binned_vector_feature_image<feature_extractor,hash_function_type>:: - copy_configuration ( - const feature_extractor& item - ) - { - fe.copy_configuration(item); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - void binned_vector_feature_image<feature_extractor,hash_function_type>:: - copy_configuration ( - const binned_vector_feature_image& item - ) - { - fe.copy_configuration(item.fe); - phash = item.phash; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - template < - typename image_type - > - void binned_vector_feature_image<feature_extractor,hash_function_type>:: - load ( - const image_type& img - ) - { - fe.load(img); - - if (fe.size() != 0) - { - feats.set_size(fe.nr(), fe.nc()); - for (long r = 0; r < feats.nr(); ++r) - { - for (long c = 0; c < feats.nc(); ++c) - { - feats[r][c].clear(); - feats[r][c].reserve(fe.get_num_dimensions()+1); - const typename feature_extractor::descriptor_type& des = fe(r,c); - const unsigned long idx = phash(des); - const unsigned long offset = idx*(fe.get_num_dimensions()+1); - - for (long i = 0; i < des.size(); ++i) - { - feats[r][c].push_back(std::make_pair(offset + i, des(i))); - } - feats[r][c].push_back(std::make_pair(offset + des.size(), 1.0)); - } - } - } - else - { - feats.set_size(0,0); - } - - fe.unload(); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - size_t binned_vector_feature_image<feature_extractor,hash_function_type>:: - size ( - ) const - { - return feats.size(); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - long binned_vector_feature_image<feature_extractor,hash_function_type>:: - nr ( - ) const - { - return feats.nr(); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - long binned_vector_feature_image<feature_extractor,hash_function_type>:: - nc ( - ) const - { - return feats.nc(); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - long binned_vector_feature_image<feature_extractor,hash_function_type>:: - get_num_dimensions ( - ) const - { - return phash.num_hash_bins()*(fe.get_num_dimensions()+1); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - const std::vector<std::pair<unsigned int,double> >& binned_vector_feature_image<feature_extractor,hash_function_type>:: - operator() ( - long row, - long col - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(0 <= row && row < nr() && - 0 <= col && col < nc(), - "\t descriptor_type binned_vector_feature_image::operator(row,col)" - << "\n\t Invalid inputs were given to this function" - << "\n\t row: " << row - << "\n\t col: " << col - << "\n\t nr(): " << nr() - << "\n\t nc(): " << nc() - << "\n\t this: " << this - ); - - return feats[row][col]; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - const rectangle binned_vector_feature_image<feature_extractor,hash_function_type>:: - get_block_rect ( - long row, - long col - ) const - { - return fe.get_block_rect(row,col); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - const point binned_vector_feature_image<feature_extractor,hash_function_type>:: - image_to_feat_space ( - const point& p - ) const - { - return fe.image_to_feat_space(p); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - const rectangle binned_vector_feature_image<feature_extractor,hash_function_type>:: - image_to_feat_space ( - const rectangle& rect - ) const - { - return fe.image_to_feat_space(rect); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - const point binned_vector_feature_image<feature_extractor,hash_function_type>:: - feat_to_image_space ( - const point& p - ) const - { - return fe.feat_to_image_space(p); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - const rectangle binned_vector_feature_image<feature_extractor,hash_function_type>:: - feat_to_image_space ( - const rectangle& rect - ) const - { - return fe.feat_to_image_space(rect); - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_BINNED_VECTOR_IMAGE_FEATUrES_Hh_ - - diff --git a/ml/dlib/dlib/image_keypoint/binned_vector_feature_image_abstract.h b/ml/dlib/dlib/image_keypoint/binned_vector_feature_image_abstract.h deleted file mode 100644 index 6bd6cdbb8..000000000 --- a/ml/dlib/dlib/image_keypoint/binned_vector_feature_image_abstract.h +++ /dev/null @@ -1,287 +0,0 @@ -// Copyright (C) 2013 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_BINNED_VECTOR_FEATUrES_ABSTRACT_Hh_ -#ifdef DLIB_BINNED_VECTOR_FEATUrES_ABSTRACT_Hh_ - -#include "../lsh/projection_hash_abstract.h" -#include <vector> -#include "../matrix.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type_ = projection_hash - > - class binned_vector_feature_image : noncopyable - { - /*! - REQUIREMENTS ON feature_extractor - - must be an object with an interface compatible with dlib::hog_image - - REQUIREMENTS ON hash_function_type_ - - must be an object with an interface compatible with projection_hash - - INITIAL VALUE - - size() == 0 - - WHAT THIS OBJECT REPRESENTS - This object is a tool for performing image feature extraction. In - particular, it wraps another image feature extractor and converts the - wrapped image feature vectors into a high dimensional sparse vector. For - example, if the lower level feature extractor outputs the vector [3,4,5] - and this vector is hashed into the second bin of four bins then the output - sparse vector is: - [0,0,0,0, 3,4,5,1, 0,0,0,0, 0,0,0,0]. - That is, the output vector has a dimensionality that is equal to the number - of hash bins times the dimensionality of the lower level vector plus one. - The value in the extra dimension concatenated onto the end of the vector is - always a constant value of of 1 and serves as a bias value. This means - that, if there are N hash bins, these vectors are capable of representing N - different linear functions, each operating on the vectors that fall into - their corresponding hash bin. - - - THREAD SAFETY - Concurrent access to an instance of this object is not safe and should be - protected by a mutex lock except for the case where you are copying the - configuration (via copy_configuration()) of a binned_vector_feature_image - object to many other threads. In this case, it is safe to copy the - configuration of a shared object so long as no other operations are - performed on it. - - - NOTATION - let BASE_FE denote the base feature_extractor object contained inside the - binned_vector_feature_image. - !*/ - - public: - - typedef feature_extractor feature_extractor_type; - typedef hash_function_type_ hash_function_type; - typedef std::vector<std::pair<unsigned int,double> > descriptor_type; - - binned_vector_feature_image ( - ); - /*! - ensures - - this object is properly initialized - !*/ - - void clear ( - ); - /*! - ensures - - this object will have its initial value - !*/ - - void set_hash ( - const hash_function_type& hash - ); - /*! - ensures - - #get_hash() == hash - !*/ - - const hash_function_type& get_hash ( - ) const; - /*! - ensures - - returns the hash function used by this object to hash - base feature vectors into integers. - !*/ - - void copy_configuration ( - const feature_extractor& item - ); - /*! - ensures - - performs BASE_FE.copy_configuration(item) - !*/ - - void copy_configuration ( - const binned_vector_feature_image& item - ); - /*! - ensures - - copies all the state information of item into *this, except for state - information populated by load(). More precisely, given two binned_vector_feature_image - objects H1 and H2, the following sequence of instructions should always - result in both of them having the exact same state. - H2.copy_configuration(H1); - H1.load(img); - H2.load(img); - !*/ - - template < - typename image_type - > - void load ( - const image_type& img - ); - /*! - requires - - image_type == any type that can be supplied to feature_extractor::load() - ensures - - performs BASE_FE.load(img) - i.e. does feature extraction. The features can be accessed using - operator() as defined below. - !*/ - - size_t size ( - ) const; - /*! - ensures - - returns BASE_FE.size() - !*/ - - long nr ( - ) const; - /*! - ensures - - returns BASE_FE.nr() - !*/ - - long nc ( - ) const; - /*! - ensures - - returns BASE_FE.nc() - !*/ - - long get_num_dimensions ( - ) const; - /*! - ensures - - returns the dimensionality of the feature vectors returned by operator(). - In this case, this is the number of hash bins times the dimensionality of - the features produced by BASE_FE plus one. That is, this function - returns get_hash().num_hash_bins()*(BASE_FE.get_num_dimensions()+1) - !*/ - - const descriptor_type& operator() ( - long row, - long col - ) const; - /*! - requires - - 0 <= row < nr() - - 0 <= col < nc() - - It must be legal to evaluate expressions of the form: get_hash()(BASE_FE(row,col)) - (e.g. the hash function must be properly configured to process the feature - vectors produced by the base feature extractor) - ensures - - hashes BASE_FE(row,col) and returns the resulting sparse vector. In - particular, we return a vector that is a copy of BASE_FE(row,col) that - has been shifted into the part of the sparse vector indicated by the hash - function. It will also have a constant bias value of 1 appended to it. - - To be precise, this function returns a sparse vector V such that: - - V.size() == BASE_FE.get_num_dimensions()+1 - - let IDX = get_hash()(BASE_FE(row,col)) - - for i where 0 <= i < BASE_FE.get_num_dimensions(): - - V[i].first == IDX*(BASE_FE.get_num_dimensions()+1) + i - - V[i].second == BASE_FE(row,col)(i) - - V[BASE_FE.get_num_dimensions()].first == IDX*(BASE_FE.get_num_dimensions()+1) + BASE_FE.get_num_dimensions() - - V[BASE_FE.get_num_dimensions()].second == 1 - !*/ - - const rectangle get_block_rect ( - long row, - long col - ) const; - /*! - ensures - - returns BASE_FE.get_block_rect(row,col) - I.e. returns a rectangle that tells you what part of the original image is associated - with a particular feature vector. - !*/ - - const point image_to_feat_space ( - const point& p - ) const; - /*! - ensures - - returns BASE_FE.image_to_feat_space(p) - I.e. Each local feature is extracted from a certain point in the input image. - This function returns the identity of the local feature corresponding - to the image location p. Or in other words, let P == image_to_feat_space(p), - then (*this)(P.y(),P.x()) == the local feature closest to, or centered at, - the point p in the input image. Note that some image points might not have - corresponding feature locations. E.g. border points or points outside the - image. In these cases the returned point will be outside get_rect(*this). - !*/ - - const rectangle image_to_feat_space ( - const rectangle& rect - ) const; - /*! - ensures - - returns BASE_FE.image_to_feat_space(rect) - I.e. returns rectangle(image_to_feat_space(rect.tl_corner()), image_to_feat_space(rect.br_corner())); - (i.e. maps a rectangle from image space to feature space) - !*/ - - const point feat_to_image_space ( - const point& p - ) const; - /*! - ensures - - returns BASE_FE.feat_to_image_space(p) - I.e. returns the location in the input image space corresponding to the center - of the local feature at point p. In other words, this function computes - the inverse of image_to_feat_space(). Note that it may only do so approximately, - since more than one image location might correspond to the same local feature. - That is, image_to_feat_space() might not be invertible so this function gives - the closest possible result. - !*/ - - const rectangle feat_to_image_space ( - const rectangle& rect - ) const; - /*! - ensures - - returns BASE_FE.feat_to_image_space(rect) - I.e. return rectangle(feat_to_image_space(rect.tl_corner()), feat_to_image_space(rect.br_corner())); - (i.e. maps a rectangle from feature space to image space) - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename U - > - void serialize ( - const binned_vector_feature_image<T,U>& item, - std::ostream& out - ); - /*! - provides serialization support - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename U - > - void deserialize ( - binned_vector_feature_image<T,U>& item, - std::istream& in - ); - /*! - provides deserialization support - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_BINNED_VECTOR_FEATUrES_ABSTRACT_Hh_ - diff --git a/ml/dlib/dlib/image_keypoint/build_separable_poly_filters.h b/ml/dlib/dlib/image_keypoint/build_separable_poly_filters.h deleted file mode 100644 index aea59067d..000000000 --- a/ml/dlib/dlib/image_keypoint/build_separable_poly_filters.h +++ /dev/null @@ -1,186 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_BUILD_SEPARABLE_PoLY_FILTERS_Hh_ -#define DLIB_BUILD_SEPARABLE_PoLY_FILTERS_Hh_ - -#include "../matrix.h" -#include "surf.h" -#include "../uintn.h" -#include <vector> - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - typedef std::pair<matrix<double,0,1>, matrix<double,0,1> > separable_filter_type; - typedef std::pair<matrix<int32,0,1>, matrix<int32,0,1> > separable_int32_filter_type; - -// ---------------------------------------------------------------------------------------- - - inline std::vector<std::vector<separable_filter_type> > build_separable_poly_filters ( - const long order, - const long window_size - ) - /*! - requires - - 1 <= order <= 6 - - window_size >= 3 && window_size is odd - ensures - - the "first" element is the row_filter, the second is the col_filter. - - Some filters are not totally separable and that's why they are grouped - into vectors of vectors. The groups are all the parts of a partially - separable filter. - !*/ - { - long num_filters = 6; - switch (order) - { - case 1: num_filters = 3; break; - case 2: num_filters = 6; break; - case 3: num_filters = 10; break; - case 4: num_filters = 15; break; - case 5: num_filters = 21; break; - case 6: num_filters = 28; break; - } - - matrix<double> X(window_size*window_size,num_filters); - matrix<double,0,1> G(window_size*window_size,1); - const double sigma = window_size/4.0; - - - long cnt = 0; - for (double x = -window_size/2; x <= window_size/2; ++x) - { - for (double y = -window_size/2; y <= window_size/2; ++y) - { - X(cnt, 0) = 1; - X(cnt, 1) = x; - X(cnt, 2) = y; - - if (X.nc() > 5) - { - X(cnt, 3) = x*x; - X(cnt, 4) = x*y; - X(cnt, 5) = y*y; - } - if (X.nc() > 9) - { - X(cnt, 6) = x*x*x; - X(cnt, 7) = y*x*x; - X(cnt, 8) = y*y*x; - X(cnt, 9) = y*y*y; - } - if (X.nc() > 14) - { - X(cnt, 10) = x*x*x*x; - X(cnt, 11) = y*x*x*x; - X(cnt, 12) = y*y*x*x; - X(cnt, 13) = y*y*y*x; - X(cnt, 14) = y*y*y*y; - } - if (X.nc() > 20) - { - X(cnt, 15) = x*x*x*x*x; - X(cnt, 16) = y*x*x*x*x; - X(cnt, 17) = y*y*x*x*x; - X(cnt, 18) = y*y*y*x*x; - X(cnt, 19) = y*y*y*y*x; - X(cnt, 20) = y*y*y*y*y; - } - if (X.nc() > 27) - { - X(cnt, 21) = x*x*x*x*x*x; - X(cnt, 22) = y*x*x*x*x*x; - X(cnt, 23) = y*y*x*x*x*x; - X(cnt, 24) = y*y*y*x*x*x; - X(cnt, 25) = y*y*y*y*x*x; - X(cnt, 26) = y*y*y*y*y*x; - X(cnt, 27) = y*y*y*y*y*y; - } - - G(cnt) = std::sqrt(gaussian(x,y,sigma)); - ++cnt; - } - } - - X = diagm(G)*X; - - const matrix<double> S = inv(trans(X)*X)*trans(X)*diagm(G); - - matrix<double,0,1> row_filter, col_filter; - - matrix<double> u,v, temp; - matrix<double,0,1> w; - - std::vector<std::vector<separable_filter_type> > results(num_filters); - - for (long r = 0; r < S.nr(); ++r) - { - temp = reshape(rowm(S,r), window_size, window_size); - svd3(temp,u,w,v); - const double thresh = max(w)*1e-8; - for (long i = 0; i < w.size(); ++i) - { - if (w(i) > thresh) - { - col_filter = std::sqrt(w(i))*colm(u,i); - row_filter = std::sqrt(w(i))*colm(v,i); - results[r].push_back(std::make_pair(row_filter, col_filter)); - } - } - } - - return results; - } - -// ---------------------------------------------------------------------------------------- - - inline std::vector<std::vector<separable_int32_filter_type> > build_separable_int32_poly_filters ( - const long order, - const long window_size, - const double max_range = 300.0 - ) - /*! - requires - - 1 <= order <= 6 - - window_size >= 3 && window_size is odd - - max_range > 1 - ensures - - the "first" element is the row_filter, the second is the col_filter. - !*/ - { - const std::vector<std::vector<separable_filter_type> >& filters = build_separable_poly_filters(order, window_size); - std::vector<std::vector<separable_int32_filter_type> > int_filters(filters.size()); - - for (unsigned long i = 0; i < filters.size(); ++i) - { - - double max_val = 0; - for (unsigned long j = 0; j < filters[i].size(); ++j) - { - const separable_filter_type& filt = filters[i][j]; - max_val = std::max(max_val, max(abs(filt.first))); - max_val = std::max(max_val, max(abs(filt.second))); - } - if (max_val == 0) - max_val = 1; - - int_filters[i].resize(filters[i].size()); - for (unsigned long j = 0; j < filters[i].size(); ++j) - { - const separable_filter_type& filt = filters[i][j]; - int_filters[i][j].first = matrix_cast<int32>(round(filt.first*max_range/max_val)); - int_filters[i][j].second = matrix_cast<int32>(round(filt.second*max_range/max_val)); - } - } - - return int_filters; - } - -} - -// ---------------------------------------------------------------------------------------- - -#endif // DLIB_BUILD_SEPARABLE_PoLY_FILTERS_Hh_ - diff --git a/ml/dlib/dlib/image_keypoint/draw_surf_points.h b/ml/dlib/dlib/image_keypoint/draw_surf_points.h deleted file mode 100644 index b16c28f5d..000000000 --- a/ml/dlib/dlib/image_keypoint/draw_surf_points.h +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright (C) 2013 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_DRAW_SURf_POINTS_H_ -#define DLIB_DRAW_SURf_POINTS_H_ - -#include "surf.h" -#include "../gui_widgets.h" -#include "draw_surf_points_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - inline void draw_surf_points ( - image_window& win, - const std::vector<surf_point>& sp - ) - { - for (unsigned long i = 0; i < sp.size(); ++i) - { - const unsigned long radius = static_cast<unsigned long>(sp[i].p.scale*3); - const point center(sp[i].p.center); - point direction = center + point(radius,0); - // SURF descriptors are rotated by sp[i].angle. So we want to include a visual - // indication of this rotation on our overlay. - direction = rotate_point(center, direction, sp[i].angle); - - win.add_overlay(image_display::overlay_circle(center, radius, rgb_pixel(0,255,0))); - // Draw a line showing the orientation of the SURF descriptor. - win.add_overlay(center, direction, rgb_pixel(255,0,0)); - } - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_DRAW_SURf_POINTS_H_ - - diff --git a/ml/dlib/dlib/image_keypoint/draw_surf_points_abstract.h b/ml/dlib/dlib/image_keypoint/draw_surf_points_abstract.h deleted file mode 100644 index 86a66ef49..000000000 --- a/ml/dlib/dlib/image_keypoint/draw_surf_points_abstract.h +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright (C) 2013 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_DRAW_SURf_POINTS_ABSTRACT_H_ -#ifdef DLIB_DRAW_SURf_POINTS_ABSTRACT_H_ - -#include "surf_abstract.h" -#include "../gui_widgets.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - void draw_surf_points ( - image_window& win, - const std::vector<surf_point>& sp - ); - /*! - ensures - - draws all the SURF points in sp onto the given image_window. They - are drawn as overlay circles with extra lines to indicate the rotation - of the SURF descriptor. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_DRAW_SURf_POINTS_ABSTRACT_H_ - diff --git a/ml/dlib/dlib/image_keypoint/fine_hog_image.h b/ml/dlib/dlib/image_keypoint/fine_hog_image.h deleted file mode 100644 index a421ffe7c..000000000 --- a/ml/dlib/dlib/image_keypoint/fine_hog_image.h +++ /dev/null @@ -1,378 +0,0 @@ -// Copyright (C) 2012 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_FINE_HOG_IMaGE_Hh_ -#define DLIB_FINE_HOG_IMaGE_Hh_ - -#include "fine_hog_image_abstract.h" -#include "../array2d.h" -#include "../matrix.h" -#include "hog.h" - - -namespace dlib -{ - template < - unsigned long cell_size_, - unsigned long block_size_, - unsigned long pixel_stride_, - unsigned char num_orientation_bins_, - int gradient_type_ - > - class fine_hog_image : noncopyable - { - COMPILE_TIME_ASSERT(cell_size_ > 1); - COMPILE_TIME_ASSERT(block_size_ > 0); - COMPILE_TIME_ASSERT(pixel_stride_ > 0); - COMPILE_TIME_ASSERT(num_orientation_bins_ > 0); - - COMPILE_TIME_ASSERT( gradient_type_ == hog_signed_gradient || - gradient_type_ == hog_unsigned_gradient); - - - public: - - const static unsigned long cell_size = cell_size_; - const static unsigned long block_size = block_size_; - const static unsigned long pixel_stride = pixel_stride_; - const static unsigned long num_orientation_bins = num_orientation_bins_; - const static int gradient_type = gradient_type_; - - const static long min_size = cell_size*block_size+2; - - typedef matrix<double, block_size*block_size*num_orientation_bins, 1> descriptor_type; - - fine_hog_image ( - ) : - num_block_rows(0), - num_block_cols(0) - {} - - void clear ( - ) - { - num_block_rows = 0; - num_block_cols = 0; - hist_counts.clear(); - } - - void copy_configuration ( - const fine_hog_image& - ){} - - template < - typename image_type - > - inline void load ( - const image_type& img - ) - { - COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<image_type>::pixel_type>::has_alpha == false ); - load_impl(mat(img)); - } - - inline void unload( - ) { clear(); } - - inline size_t size ( - ) const { return static_cast<size_t>(nr()*nc()); } - - inline long nr ( - ) const { return num_block_rows; } - - inline long nc ( - ) const { return num_block_cols; } - - long get_num_dimensions ( - ) const - { - return block_size*block_size*num_orientation_bins; - } - - inline const descriptor_type& operator() ( - long row, - long col - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT( 0 <= row && row < nr() && - 0 <= col && col < nc(), - "\t descriptor_type fine_hog_image::operator()()" - << "\n\t invalid row or col argument" - << "\n\t row: " << row - << "\n\t col: " << col - << "\n\t nr(): " << nr() - << "\n\t nc(): " << nc() - << "\n\t this: " << this - ); - - row *= pixel_stride; - col *= pixel_stride; - - des = 0; - unsigned long off = 0; - for (unsigned long r = 0; r < block_size; ++r) - { - for (unsigned long c = 0; c < block_size; ++c) - { - for (unsigned long rr = 0; rr < cell_size; ++rr) - { - for (unsigned long cc = 0; cc < cell_size; ++cc) - { - const histogram_count& hist = hist_counts[row + r*cell_size + rr][col + c*cell_size + cc]; - des(off + hist.quantized_angle_lower) += hist.lower_strength; - des(off + hist.quantized_angle_upper) += hist.upper_strength; - } - } - - off += num_orientation_bins; - } - } - - des /= length(des) + 1e-8; - - return des; - } - - const rectangle get_block_rect ( - long row, - long col - ) const - { - row *= pixel_stride; - col *= pixel_stride; - - // do this to account for the 1 pixel padding we use all around the image - ++row; - ++col; - - return rectangle(col, row, col+cell_size*block_size-1, row+cell_size*block_size-1); - } - - const point image_to_feat_space ( - const point& p - ) const - { - const long border_size = 1 + cell_size*block_size/2; - return (p-point(border_size,border_size))/(long)pixel_stride; - } - - const rectangle image_to_feat_space ( - const rectangle& rect - ) const - { - return rectangle(image_to_feat_space(rect.tl_corner()), image_to_feat_space(rect.br_corner())); - } - - const point feat_to_image_space ( - const point& p - ) const - { - const long border_size = 1 + cell_size*block_size/2; - return p*(long)pixel_stride + point(border_size,border_size); - } - - const rectangle feat_to_image_space ( - const rectangle& rect - ) const - { - return rectangle(feat_to_image_space(rect.tl_corner()), feat_to_image_space(rect.br_corner())); - } - - - - // these _PRIVATE_ functions are only here as a workaround for a bug in visual studio 2005. - void _PRIVATE_serialize (std::ostream& out) const - { - // serialize hist_counts - serialize(hist_counts.nc(),out); - serialize(hist_counts.nr(),out); - hist_counts.reset(); - while (hist_counts.move_next()) - hist_counts.element().serialize(out); - hist_counts.reset(); - - - serialize(num_block_rows, out); - serialize(num_block_cols, out); - } - - void _PRIVATE_deserialize (std::istream& in ) - { - // deserialize item.hist_counts - long nc, nr; - deserialize(nc,in); - deserialize(nr,in); - hist_counts.set_size(nr,nc); - while (hist_counts.move_next()) - hist_counts.element().deserialize(in); - hist_counts.reset(); - - - deserialize(num_block_rows, in); - deserialize(num_block_cols, in); - } - - private: - - template < - typename image_type - > - void load_impl ( - const image_type& img - ) - { - // Note that we keep a border of 1 pixel all around the image so that we don't have - // to worry about running outside the image when computing the horizontal and vertical - // gradients. - - - - // check if the window is just too small - if (img.nr() < min_size || img.nc() < min_size) - { - // If the image is smaller than our windows then there aren't any descriptors at all! - num_block_rows = 0; - num_block_cols = 0; - hist_counts.clear(); - return; - } - - hist_counts.set_size(img.nr()-2, img.nc()-2); - - - - - for (long r = 0; r < hist_counts.nr(); ++r) - { - for (long c = 0; c < hist_counts.nc(); ++c) - { - unsigned long left; - unsigned long right; - unsigned long top; - unsigned long bottom; - - assign_pixel(left, img(r+1,c)); - assign_pixel(right, img(r+1,c+2)); - assign_pixel(top, img(r ,c+1)); - assign_pixel(bottom, img(r+2,c+1)); - - double grad_x = (long)right-(long)left; - double grad_y = (long)top-(long)bottom; - - // obtain the angle of the gradient. Make sure it is scaled between 0 and 1. - double angle = std::max(0.0, std::atan2(grad_y, grad_x)/pi + 1)/2; - - - if (gradient_type == hog_unsigned_gradient) - { - angle *= 2; - if (angle >= 1) - angle -= 1; - } - - - // now scale angle to between 0 and num_orientation_bins - angle *= num_orientation_bins; - - - const double strength = std::sqrt(grad_y*grad_y + grad_x*grad_x); - - - unsigned char quantized_angle_lower = static_cast<unsigned char>(std::floor(angle)); - unsigned char quantized_angle_upper = static_cast<unsigned char>(std::ceil(angle)); - - quantized_angle_lower %= num_orientation_bins; - quantized_angle_upper %= num_orientation_bins; - - const double angle_split = (angle-std::floor(angle)); - const double upper_strength = angle_split*strength; - const double lower_strength = (1-angle_split)*strength; - - // Stick into gradient counts. Note that we linearly interpolate between neighboring - // histogram buckets. - hist_counts[r][c].quantized_angle_lower = quantized_angle_lower; - hist_counts[r][c].quantized_angle_upper = quantized_angle_upper; - hist_counts[r][c].lower_strength = lower_strength; - hist_counts[r][c].upper_strength = upper_strength; - - } - } - - - // Now figure out how many feature extraction blocks we should have. - num_block_rows = (hist_counts.nr() - block_size*cell_size + 1)/(long)pixel_stride; - num_block_cols = (hist_counts.nc() - block_size*cell_size + 1)/(long)pixel_stride; - - } - - struct histogram_count - { - unsigned char quantized_angle_lower; - unsigned char quantized_angle_upper; - float lower_strength; - float upper_strength; - - void serialize(std::ostream& out) const - { - dlib::serialize(quantized_angle_lower, out); - dlib::serialize(quantized_angle_upper, out); - dlib::serialize(lower_strength, out); - dlib::serialize(upper_strength, out); - } - void deserialize(std::istream& in) - { - dlib::deserialize(quantized_angle_lower, in); - dlib::deserialize(quantized_angle_upper, in); - dlib::deserialize(lower_strength, in); - dlib::deserialize(upper_strength, in); - } - }; - - array2d<histogram_count> hist_counts; - - mutable descriptor_type des; - - long num_block_rows; - long num_block_cols; - - - }; - -// ---------------------------------------------------------------------------------------- - - template < - unsigned long T1, - unsigned long T2, - unsigned long T3, - unsigned char T4, - int T5 - > - void serialize ( - const fine_hog_image<T1,T2,T3,T4,T5>& item, - std::ostream& out - ) - { - item._PRIVATE_serialize(out); - } - - template < - unsigned long T1, - unsigned long T2, - unsigned long T3, - unsigned char T4, - int T5 - > - void deserialize ( - fine_hog_image<T1,T2,T3,T4,T5>& item, - std::istream& in - ) - { - item._PRIVATE_deserialize(in); - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_FINE_HOG_IMaGE_Hh_ - diff --git a/ml/dlib/dlib/image_keypoint/fine_hog_image_abstract.h b/ml/dlib/dlib/image_keypoint/fine_hog_image_abstract.h deleted file mode 100644 index 50be85afe..000000000 --- a/ml/dlib/dlib/image_keypoint/fine_hog_image_abstract.h +++ /dev/null @@ -1,276 +0,0 @@ -// Copyright (C) 2012 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_FINE_HOG_IMaGE_ABSTRACT_Hh_ -#ifdef DLIB_FINE_HOG_IMaGE_ABSTRACT_Hh_ - -#include "../array2d.h" -#include "../matrix.h" -#include "hog_abstract.h" - - -namespace dlib -{ - template < - unsigned long cell_size_, - unsigned long block_size_, - unsigned long pixel_stride_, - unsigned char num_orientation_bins_, - int gradient_type_ - > - class fine_hog_image : noncopyable - { - /*! - REQUIREMENTS ON TEMPLATE PARAMETERS - - cell_size_ > 1 - - block_size_ > 0 - - pixel_stride_ > 0 - - num_orientation_bins_ > 0 - - gradient_type_ == hog_signed_gradient or hog_unsigned_gradient - - INITIAL VALUE - - size() == 0 - - WHAT THIS OBJECT REPRESENTS - This object is a version of the hog_image that allows you to extract HOG - features at a finer resolution. The hog_image can only extract HOG features - cell_size_ pixels apart. However, this object, the fine_hog_image can - extract HOG features from every pixel location. - - The template arguments to this class have the same meaning as they do for - the hog_image, except for pixel_stride_. This controls the stepping between - HOG extraction locations. A value of 1 indicates HOG features should be - extracted from every pixel location. A value of 2 indicates every other pixel - location, etc. - - Finally, note that the interpolation used by this object is equivalent - to using hog_angle_interpolation with hog_image. - - THREAD SAFETY - Concurrent access to an instance of this object is not safe and should be protected - by a mutex lock except for the case where you are copying the configuration - (via copy_configuration()) of a fine_hog_image object to many other threads. - In this case, it is safe to copy the configuration of a shared object so long - as no other operations are performed on it. - !*/ - - public: - - const static unsigned long cell_size = cell_size_; - const static unsigned long block_size = block_size_; - const static unsigned long pixel_stride = pixel_stride_; - const static unsigned long num_orientation_bins = num_orientation_bins_; - const static int gradient_type = gradient_type_; - - const static long min_size = cell_size*block_size+2; - - typedef matrix<double, block_size*block_size*num_orientation_bins, 1> descriptor_type; - - fine_hog_image ( - ); - /*! - ensures - - this object is properly initialized - !*/ - - void clear ( - ); - /*! - ensures - - this object will have its initial value - !*/ - - void copy_configuration ( - const fine_hog_image& - ); - /*! - ensures - - copies all the state information of item into *this, except for state - information populated by load(). More precisely, given two fine_hog_image - objects H1 and H2, the following sequence of instructions should always - result in both of them having the exact same state. - H2.copy_configuration(H1); - H1.load(img); - H2.load(img); - !*/ - - template < - typename image_type - > - inline void load ( - const image_type& img - ); - /*! - requires - - image_type is a dlib::matrix or something convertible to a matrix - via mat() - - pixel_traits<typename image_traits<image_type>::pixel_type>::has_alpha == false - ensures - - if (img.nr() < min_size || img.nc() < min_size) then - - the image is too small so we don't compute anything on it - - #size() == 0 - - else - - generates a HOG image from the given image. - - #size() > 0 - !*/ - - inline void unload( - ); - /*! - ensures - - #nr() == 0 - - #nc() == 0 - - clears only the state information which is populated by load(). For - example, let H be a fine_hog_image object. Then consider the two - sequences of instructions: - Sequence 1: - H.load(img); - H.unload(); - H.load(img); - - Sequence 2: - H.load(img); - Both sequence 1 and sequence 2 should have the same effect on H. - !*/ - - inline size_t size ( - ) const; - /*! - ensures - - returns nr()*nc() - !*/ - - inline long nr ( - ) const; - /*! - ensures - - returns the number of rows in this HOG image - !*/ - - inline long nc ( - ) const; - /*! - ensures - - returns the number of columns in this HOG image - !*/ - - long get_num_dimensions ( - ) const; - /*! - ensures - - returns the number of dimensions in the feature vectors generated by - this object. - - In particular, returns the value block_size*block_size*num_orientation_bins - !*/ - - inline const descriptor_type& operator() ( - long row, - long col - ) const; - /*! - requires - - 0 <= row < nr() - - 0 <= col < nc() - ensures - - returns the descriptor for the HOG block at the given row and column. This descriptor - will include information from a window that is located at get_block_rect(row,col) in - the original image given to load(). - - The returned descriptor vector will have get_num_dimensions() elements. - !*/ - - const rectangle get_block_rect ( - long row, - long col - ) const; - /*! - ensures - - returns a rectangle that tells you what part of the original image is associated - with a particular HOG block. That is, what part of the input image is associated - with (*this)(row,col). - - The returned rectangle will be cell_size*block_size pixels wide and tall. - !*/ - - const point image_to_feat_space ( - const point& p - ) const; - /*! - ensures - - Each local feature is extracted from a certain point in the input image. - This function returns the identity of the local feature corresponding - to the image location p. Or in other words, let P == image_to_feat_space(p), - then (*this)(P.y(),P.x()) == the local feature closest to, or centered at, - the point p in the input image. Note that some image points might not have - corresponding feature locations. E.g. border points or points outside the - image. In these cases the returned point will be outside get_rect(*this). - !*/ - - const rectangle image_to_feat_space ( - const rectangle& rect - ) const; - /*! - ensures - - returns rectangle(image_to_feat_space(rect.tl_corner()), image_to_feat_space(rect.br_corner())); - (i.e. maps a rectangle from image space to feature space) - !*/ - - const point feat_to_image_space ( - const point& p - ) const; - /*! - ensures - - returns the location in the input image space corresponding to the center - of the local feature at point p. In other words, this function computes - the inverse of image_to_feat_space(). Note that it may only do so approximately, - since more than one image location might correspond to the same local feature. - That is, image_to_feat_space() might not be invertible so this function gives - the closest possible result. - !*/ - - const rectangle feat_to_image_space ( - const rectangle& rect - ) const; - /*! - ensures - - return rectangle(feat_to_image_space(rect.tl_corner()), feat_to_image_space(rect.br_corner())); - (i.e. maps a rectangle from feature space to image space) - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - template < - unsigned long T1, - unsigned long T2, - unsigned long T3, - unsigned char T4, - int T5 - > - void serialize ( - const fine_hog_image<T1,T2,T3,T4,T5>& item, - std::ostream& out - ); - /*! - provides serialization support - !*/ - - template < - unsigned long T1, - unsigned long T2, - unsigned long T3, - unsigned char T4, - int T5 - > - void deserialize ( - fine_hog_image<T1,T2,T3,T4,T5>& item, - std::istream& in - ); - /*! - provides deserialization support - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_FINE_HOG_IMaGE_ABSTRACT_Hh_ - diff --git a/ml/dlib/dlib/image_keypoint/hashed_feature_image.h b/ml/dlib/dlib/image_keypoint/hashed_feature_image.h deleted file mode 100644 index 80f429330..000000000 --- a/ml/dlib/dlib/image_keypoint/hashed_feature_image.h +++ /dev/null @@ -1,518 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_HASHED_IMAGE_FEATUrES_Hh_ -#define DLIB_HASHED_IMAGE_FEATUrES_Hh_ - -#include "../lsh/projection_hash.h" -#include "hashed_feature_image_abstract.h" -#include <vector> -#include "../algs.h" -#include "../matrix.h" -#include "../statistics.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type_ = projection_hash - > - class hashed_feature_image : noncopyable - { - - public: - typedef feature_extractor feature_extractor_type; - typedef hash_function_type_ hash_function_type; - - typedef std::vector<std::pair<unsigned int,double> > descriptor_type; - - hashed_feature_image ( - ); - - void clear ( - ); - - void set_hash ( - const hash_function_type& hash_ - ); - - const hash_function_type& get_hash ( - ) const; - - void copy_configuration ( - const feature_extractor& item - ); - - void copy_configuration ( - const hashed_feature_image& item - ); - - template < - typename image_type - > - inline void load ( - const image_type& img - ); - - inline size_t size ( - ) const; - - inline long nr ( - ) const; - - inline long nc ( - ) const; - - inline long get_num_dimensions ( - ) const; - - void use_relative_feature_weights ( - ); - - void use_uniform_feature_weights ( - ); - - bool uses_uniform_feature_weights ( - ) const; - - inline const descriptor_type& operator() ( - long row, - long col - ) const; - - inline const rectangle get_block_rect ( - long row, - long col - ) const; - - inline const point image_to_feat_space ( - const point& p - ) const; - - inline const rectangle image_to_feat_space ( - const rectangle& rect - ) const; - - inline const point feat_to_image_space ( - const point& p - ) const; - - inline const rectangle feat_to_image_space ( - const rectangle& rect - ) const; - - template <typename T> - friend void serialize ( - const hashed_feature_image<T>& item, - std::ostream& out - ); - - template <typename T> - friend void deserialize ( - hashed_feature_image<T>& item, - std::istream& in - ); - - private: - - array2d<unsigned long> feats; - feature_extractor fe; - hash_function_type phash; - std::vector<float> feat_counts; - bool uniform_feature_weights; - - - // This is a transient variable. It is just here so it doesn't have to be - // reallocated over and over inside operator() - mutable descriptor_type hash_feats; - - }; - -// ---------------------------------------------------------------------------------------- - - template <typename T> - void serialize ( - const hashed_feature_image<T>& item, - std::ostream& out - ) - { - int version = 1; - serialize(version, out); - serialize(item.feats, out); - serialize(item.fe, out); - serialize(item.phash, out); - serialize(item.feat_counts, out); - serialize(item.uniform_feature_weights, out); - } - - template <typename T> - void deserialize ( - hashed_feature_image<T>& item, - std::istream& in - ) - { - int version = 0; - deserialize(version, in); - if (version != 1) - throw serialization_error("Unexpected version found while deserializing a dlib::hashed_feature_image object."); - - deserialize(item.feats, in); - deserialize(item.fe, in); - deserialize(item.phash, in); - deserialize(item.feat_counts, in); - deserialize(item.uniform_feature_weights, in); - } - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// hashed_feature_image member functions -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - hashed_feature_image<feature_extractor,hash_function_type>:: - hashed_feature_image ( - ) - { - clear(); - hash_feats.resize(1); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - void hashed_feature_image<feature_extractor,hash_function_type>:: - clear ( - ) - { - fe.clear(); - phash = hash_function_type(); - feats.clear(); - feat_counts.clear(); - uniform_feature_weights = false; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - void hashed_feature_image<feature_extractor,hash_function_type>:: - set_hash ( - const hash_function_type& hash_ - ) - { - phash = hash_; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - const hash_function_type& hashed_feature_image<feature_extractor,hash_function_type>:: - get_hash ( - ) const - { - return phash; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - void hashed_feature_image<feature_extractor,hash_function_type>:: - copy_configuration ( - const feature_extractor& item - ) - { - fe.copy_configuration(item); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - void hashed_feature_image<feature_extractor,hash_function_type>:: - copy_configuration ( - const hashed_feature_image& item - ) - { - fe.copy_configuration(item.fe); - phash = item.phash; - uniform_feature_weights = item.uniform_feature_weights; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - template < - typename image_type - > - void hashed_feature_image<feature_extractor,hash_function_type>:: - load ( - const image_type& img - ) - { - fe.load(img); - - if (fe.size() != 0) - { - feats.set_size(fe.nr(), fe.nc()); - feat_counts.assign(phash.num_hash_bins(),1); - if (uniform_feature_weights) - { - for (long r = 0; r < feats.nr(); ++r) - { - for (long c = 0; c < feats.nc(); ++c) - { - feats[r][c] = phash(fe(r,c)); - } - } - } - else - { - for (long r = 0; r < feats.nr(); ++r) - { - for (long c = 0; c < feats.nc(); ++c) - { - feats[r][c] = phash(fe(r,c)); - feat_counts[feats[r][c]]++; - } - } - } - } - else - { - feats.set_size(0,0); - } - - if (!uniform_feature_weights) - { - // use the inverse frequency as the scale for each feature. We also scale - // these counts so that they are invariant to the size of the image (we scale - // them so they all look like they come from a 500x400 images). - const double scale = image_size(img)/(500.0*400.0); - for (unsigned long i = 0; i < feat_counts.size(); ++i) - { - feat_counts[i] = scale/feat_counts[i]; - } - } - - fe.unload(); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - size_t hashed_feature_image<feature_extractor,hash_function_type>:: - size ( - ) const - { - return feats.size(); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - long hashed_feature_image<feature_extractor,hash_function_type>:: - nr ( - ) const - { - return feats.nr(); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - long hashed_feature_image<feature_extractor,hash_function_type>:: - nc ( - ) const - { - return feats.nc(); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - long hashed_feature_image<feature_extractor,hash_function_type>:: - get_num_dimensions ( - ) const - { - return phash.num_hash_bins(); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - void hashed_feature_image<feature_extractor,hash_function_type>:: - use_relative_feature_weights ( - ) - { - uniform_feature_weights = false; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - void hashed_feature_image<feature_extractor,hash_function_type>:: - use_uniform_feature_weights ( - ) - { - uniform_feature_weights = true; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - bool hashed_feature_image<feature_extractor,hash_function_type>:: - uses_uniform_feature_weights ( - ) const - { - return uniform_feature_weights; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - const std::vector<std::pair<unsigned int,double> >& hashed_feature_image<feature_extractor,hash_function_type>:: - operator() ( - long row, - long col - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(0 <= row && row < nr() && - 0 <= col && col < nc(), - "\t descriptor_type hashed_feature_image::operator(row,col)" - << "\n\t Invalid inputs were given to this function" - << "\n\t row: " << row - << "\n\t col: " << col - << "\n\t nr(): " << nr() - << "\n\t nc(): " << nc() - << "\n\t this: " << this - ); - - hash_feats[0] = std::make_pair(feats[row][col],feat_counts[feats[row][col]]); - return hash_feats; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - const rectangle hashed_feature_image<feature_extractor,hash_function_type>:: - get_block_rect ( - long row, - long col - ) const - { - return fe.get_block_rect(row,col); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - const point hashed_feature_image<feature_extractor,hash_function_type>:: - image_to_feat_space ( - const point& p - ) const - { - return fe.image_to_feat_space(p); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - const rectangle hashed_feature_image<feature_extractor,hash_function_type>:: - image_to_feat_space ( - const rectangle& rect - ) const - { - return fe.image_to_feat_space(rect); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - const point hashed_feature_image<feature_extractor,hash_function_type>:: - feat_to_image_space ( - const point& p - ) const - { - return fe.feat_to_image_space(p); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type - > - const rectangle hashed_feature_image<feature_extractor,hash_function_type>:: - feat_to_image_space ( - const rectangle& rect - ) const - { - return fe.feat_to_image_space(rect); - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_HASHED_IMAGE_FEATUrES_Hh_ - - diff --git a/ml/dlib/dlib/image_keypoint/hashed_feature_image_abstract.h b/ml/dlib/dlib/image_keypoint/hashed_feature_image_abstract.h deleted file mode 100644 index 90c1348c5..000000000 --- a/ml/dlib/dlib/image_keypoint/hashed_feature_image_abstract.h +++ /dev/null @@ -1,303 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_HASHED_IMAGE_FEATUrES_ABSTRACT_Hh_ -#ifdef DLIB_HASHED_IMAGE_FEATUrES_ABSTRACT_Hh_ - -#include "../lsh/projection_hash_abstract.h" -#include <vector> -#include "../matrix.h" -#include "../statistics.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor, - typename hash_function_type_ = projection_hash - > - class hashed_feature_image : noncopyable - { - /*! - REQUIREMENTS ON feature_extractor - - must be an object with an interface compatible with dlib::hog_image - - REQUIREMENTS ON hash_function_type_ - - must be an object with an interface compatible with projection_hash - - INITIAL VALUE - - size() == 0 - - uses_uniform_feature_weights() == false - - WHAT THIS OBJECT REPRESENTS - This object is a tool for performing image feature extraction. In - particular, it wraps another image feature extractor and converts the - wrapped image feature vectors into sparse indicator vectors. It does this - by hashing each feature vector into the range [0, get_num_dimensions()-1] - and then returns a new vector which is zero everywhere except for the - position determined by the hash. - - - THREAD SAFETY - Concurrent access to an instance of this object is not safe and should be protected - by a mutex lock except for the case where you are copying the configuration - (via copy_configuration()) of a hashed_feature_image object to many other threads. - In this case, it is safe to copy the configuration of a shared object so long - as no other operations are performed on it. - - - NOTATION - let BASE_FE denote the base feature_extractor object contained inside - the hashed_feature_image. - !*/ - - public: - - typedef feature_extractor feature_extractor_type; - typedef hash_function_type_ hash_function_type; - typedef std::vector<std::pair<unsigned int,double> > descriptor_type; - - hashed_feature_image ( - ); - /*! - ensures - - this object is properly initialized - !*/ - - void clear ( - ); - /*! - ensures - - this object will have its initial value - !*/ - - void set_hash ( - const hash_function_type& hash - ); - /*! - ensures - - #get_hash() == hash - !*/ - - const hash_function_type& get_hash ( - ) const; - /*! - ensures - - returns the hash function used by this object to hash - base feature vectors into integers. - !*/ - - void copy_configuration ( - const feature_extractor& item - ); - /*! - ensures - - performs BASE_FE.copy_configuration(item) - !*/ - - void copy_configuration ( - const hashed_feature_image& item - ); - /*! - ensures - - copies all the state information of item into *this, except for state - information populated by load(). More precisely, given two hashed_feature_image - objects H1 and H2, the following sequence of instructions should always - result in both of them having the exact same state. - H2.copy_configuration(H1); - H1.load(img); - H2.load(img); - !*/ - - template < - typename image_type - > - void load ( - const image_type& img - ); - /*! - requires - - image_type == any type that can be supplied to feature_extractor::load() - ensures - - performs BASE_FE.load(img) - i.e. does feature extraction. The features can be accessed using - operator() as defined below. - !*/ - - size_t size ( - ) const; - /*! - ensures - - returns BASE_FE.size() - !*/ - - long nr ( - ) const; - /*! - ensures - - returns BASE_FE.nr() - !*/ - - long nc ( - ) const; - /*! - ensures - - returns BASE_FE.nc() - !*/ - - long get_num_dimensions ( - ) const; - /*! - ensures - - returns the dimensionality of the feature vectors returned by operator(). - In this case, this is the number of hash bins. That is, get_hash().num_hash_bins() - !*/ - - void use_relative_feature_weights ( - ); - /*! - ensures - - #uses_uniform_feature_weights() == false - !*/ - - void use_uniform_feature_weights ( - ); - /*! - ensures - - #uses_uniform_feature_weights() == true - !*/ - - bool uses_uniform_feature_weights ( - ) const; - /*! - ensures - - returns true if this object weights each feature with a value of 1 and - false if it uses a weighting of 1/N where N is the number of occurrences - of the feature in an image (note that we normalize N so that it is - invariant to the size of the image given to load()). - !*/ - - const descriptor_type& operator() ( - long row, - long col - ) const; - /*! - requires - - 0 <= row < nr() - - 0 <= col < nc() - - It must be legal to evaluate expressions of the form: get_hash()(BASE_FE(row,col)) - (e.g. the hash function must be properly configured to process the feature - vectors produced by the base feature extractor) - ensures - - hashes BASE_FE(row,col) and returns the resulting indicator vector. - - To be precise, this function returns a sparse vector V such that: - - V.size() == 1 - - V[0].first == get_hash()(BASE_FE(row,col)) - - if (uses_uniform_feature_weights()) then - - V[0].second == 1 - - else - - V[0].second == 1/N where N is the number of times a feature in - hash bin V[0].first was observed in the image given to load(). - Note that we scale all the counts so that they are invariant to - the size of the image. - !*/ - - const rectangle get_block_rect ( - long row, - long col - ) const; - /*! - ensures - - returns BASE_FE.get_block_rect(row,col) - I.e. returns a rectangle that tells you what part of the original image is associated - with a particular feature vector. - !*/ - - const point image_to_feat_space ( - const point& p - ) const; - /*! - ensures - - returns BASE_FE.image_to_feat_space(p) - I.e. Each local feature is extracted from a certain point in the input image. - This function returns the identity of the local feature corresponding - to the image location p. Or in other words, let P == image_to_feat_space(p), - then (*this)(P.y(),P.x()) == the local feature closest to, or centered at, - the point p in the input image. Note that some image points might not have - corresponding feature locations. E.g. border points or points outside the - image. In these cases the returned point will be outside get_rect(*this). - !*/ - - const rectangle image_to_feat_space ( - const rectangle& rect - ) const; - /*! - ensures - - returns BASE_FE.image_to_feat_space(rect) - I.e. returns rectangle(image_to_feat_space(rect.tl_corner()), image_to_feat_space(rect.br_corner())); - (i.e. maps a rectangle from image space to feature space) - !*/ - - const point feat_to_image_space ( - const point& p - ) const; - /*! - ensures - - returns BASE_FE.feat_to_image_space(p) - I.e. returns the location in the input image space corresponding to the center - of the local feature at point p. In other words, this function computes - the inverse of image_to_feat_space(). Note that it may only do so approximately, - since more than one image location might correspond to the same local feature. - That is, image_to_feat_space() might not be invertible so this function gives - the closest possible result. - !*/ - - const rectangle feat_to_image_space ( - const rectangle& rect - ) const; - /*! - ensures - - returns BASE_FE.feat_to_image_space(rect) - I.e. return rectangle(feat_to_image_space(rect.tl_corner()), feat_to_image_space(rect.br_corner())); - (i.e. maps a rectangle from feature space to image space) - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename U - > - void serialize ( - const hashed_feature_image<T,U>& item, - std::ostream& out - ); - /*! - provides serialization support - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename U - > - void deserialize ( - hashed_feature_image<T,U>& item, - std::istream& in - ); - /*! - provides deserialization support - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_HASHED_IMAGE_FEATUrES_ABSTRACT_Hh_ - - - diff --git a/ml/dlib/dlib/image_keypoint/hessian_pyramid.h b/ml/dlib/dlib/image_keypoint/hessian_pyramid.h deleted file mode 100644 index 2e672c0d0..000000000 --- a/ml/dlib/dlib/image_keypoint/hessian_pyramid.h +++ /dev/null @@ -1,531 +0,0 @@ -// Copyright (C) 2009 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_HESSIAN_PYRAMId_Hh_ -#define DLIB_HESSIAN_PYRAMId_Hh_ - -#include "hessian_pyramid_abstract.h" -#include "../algs.h" -#include "../image_transforms/integral_image.h" -#include "../array.h" -#include "../array2d.h" -#include "../noncopyable.h" -#include "../matrix.h" -#include "../stl_checked.h" -#include <algorithm> -#include <vector> - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - struct interest_point - { - interest_point() : scale(0), score(0), laplacian(0) {} - - dlib::vector<double,2> center; - double scale; - double score; - double laplacian; - - bool operator < (const interest_point& p) const { return score < p.score; } - }; - -// ---------------------------------------------------------------------------------------- - - inline void serialize( - const interest_point& item, - std::ostream& out - ) - { - try - { - serialize(item.center,out); - serialize(item.scale,out); - serialize(item.score,out); - serialize(item.laplacian,out); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while serializing object of type interest_point"); - } - } - -// ---------------------------------------------------------------------------------------- - - inline void deserialize( - interest_point& item, - std::istream& in - ) - { - try - { - deserialize(item.center,in); - deserialize(item.scale,in); - deserialize(item.score,in); - deserialize(item.laplacian,in); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while deserializing object of type interest_point"); - } - } - -// ---------------------------------------------------------------------------------------- - - class hessian_pyramid : noncopyable - { - public: - hessian_pyramid() - { - num_octaves = 0; - num_intervals = 0; - initial_step_size = 0; - } - - template <typename integral_image_type> - void build_pyramid ( - const integral_image_type& img, - long num_octaves, - long num_intervals, - long initial_step_size - ) - { - DLIB_ASSERT(num_octaves > 0 && num_intervals > 0 && initial_step_size > 0, - "\tvoid build_pyramid()" - << "\n\tAll arguments to this function must be > 0" - << "\n\t this: " << this - << "\n\t num_octaves: " << num_octaves - << "\n\t num_intervals: " << num_intervals - << "\n\t initial_step_size: " << initial_step_size - ); - - this->num_octaves = num_octaves; - this->num_intervals = num_intervals; - this->initial_step_size = initial_step_size; - - // allocate space for the pyramid - pyramid.resize(num_octaves*num_intervals); - for (long o = 0; o < num_octaves; ++o) - { - const long step_size = get_step_size(o); - for (long i = 0; i < num_intervals; ++i) - { - pyramid[num_intervals*o + i].set_size(img.nr()/step_size, img.nc()/step_size); - } - } - - // now fill out the pyramid with data - for (long o = 0; o < num_octaves; ++o) - { - const long step_size = get_step_size(o); - - for (long i = 0; i < num_intervals; ++i) - { - const long border_size = get_border_size(i)*step_size; - const long lobe_size = static_cast<long>(std::pow(2.0, o+1.0)+0.5)*(i+1) + 1; - const double area_inv = 1.0/std::pow(3.0*lobe_size, 2.0); - - const long lobe_offset = lobe_size/2+1; - const point tl(-lobe_offset,-lobe_offset); - const point tr(lobe_offset,-lobe_offset); - const point bl(-lobe_offset,lobe_offset); - const point br(lobe_offset,lobe_offset); - - for (long r = border_size; r < img.nr() - border_size; r += step_size) - { - for (long c = border_size; c < img.nc() - border_size; c += step_size) - { - const point p(c,r); - - double Dxx = img.get_sum_of_area(centered_rect(p, lobe_size*3, 2*lobe_size-1)) - - img.get_sum_of_area(centered_rect(p, lobe_size, 2*lobe_size-1))*3.0; - - double Dyy = img.get_sum_of_area(centered_rect(p, 2*lobe_size-1, lobe_size*3)) - - img.get_sum_of_area(centered_rect(p, 2*lobe_size-1, lobe_size))*3.0; - - double Dxy = img.get_sum_of_area(centered_rect(p+bl, lobe_size, lobe_size)) + - img.get_sum_of_area(centered_rect(p+tr, lobe_size, lobe_size)) - - img.get_sum_of_area(centered_rect(p+tl, lobe_size, lobe_size)) - - img.get_sum_of_area(centered_rect(p+br, lobe_size, lobe_size)); - - // now we normalize the filter responses - Dxx *= area_inv; - Dyy *= area_inv; - Dxy *= area_inv; - - - double sign_of_laplacian = +1; - if (Dxx + Dyy < 0) - sign_of_laplacian = -1; - - double determinant = Dxx*Dyy - 0.81*Dxy*Dxy; - - // If the determinant is negative then just blank it out by setting - // it to zero. - if (determinant < 0) - determinant = 0; - - // Save the determinant of the Hessian into our image pyramid. Also - // pack the laplacian sign into the value so we can get it out later. - pyramid[o*num_intervals + i][r/step_size][c/step_size] = sign_of_laplacian*determinant; - - } - } - - } - } - } - - long get_border_size ( - long interval - ) const - { - DLIB_ASSERT(0 <= interval && interval < intervals(), - "\tlong get_border_size(interval)" - << "\n\tInvalid interval value" - << "\n\t this: " << this - << "\n\t interval: " << interval - ); - - const double lobe_size = 2.0*(interval+1) + 1; - const double filter_size = 3*lobe_size; - - const long bs = static_cast<long>(std::ceil(filter_size/2.0)); - return bs; - } - - long get_step_size ( - long octave - ) const - { - DLIB_ASSERT(0 <= octave && octave < octaves(), - "\tlong get_step_size(octave)" - << "\n\tInvalid octave value" - << "\n\t this: " << this - << "\n\t octave: " << octave - ); - - return initial_step_size*static_cast<long>(std::pow(2.0, (double)octave)+0.5); - } - - long nr ( - long octave - ) const - { - DLIB_ASSERT(0 <= octave && octave < octaves(), - "\tlong nr(octave)" - << "\n\tInvalid octave value" - << "\n\t this: " << this - << "\n\t octave: " << octave - ); - - return pyramid[num_intervals*octave].nr(); - } - - long nc ( - long octave - ) const - { - DLIB_ASSERT(0 <= octave && octave < octaves(), - "\tlong nc(octave)" - << "\n\tInvalid octave value" - << "\n\t this: " << this - << "\n\t octave: " << octave - ); - - return pyramid[num_intervals*octave].nc(); - } - - double get_value ( - long octave, - long interval, - long r, - long c - ) const - { - DLIB_ASSERT(0 <= octave && octave < octaves() && - 0 <= interval && interval < intervals() && - get_border_size(interval) <= r && r < nr(octave)-get_border_size(interval) && - get_border_size(interval) <= c && c < nc(octave)-get_border_size(interval), - "\tdouble get_value(octave, interval, r, c)" - << "\n\tInvalid inputs to this function" - << "\n\t this: " << this - << "\n\t octave: " << octave - << "\n\t interval: " << interval - << "\n\t octaves: " << octaves() - << "\n\t intervals: " << intervals() - << "\n\t r: " << r - << "\n\t c: " << c - << "\n\t nr(octave): " << nr(octave) - << "\n\t nc(octave): " << nc(octave) - << "\n\t get_border_size(interval): " << get_border_size(interval) - ); - - return std::abs(pyramid[num_intervals*octave + interval][r][c]); - } - - double get_laplacian ( - long octave, - long interval, - long r, - long c - ) const - { - DLIB_ASSERT(0 <= octave && octave < octaves() && - 0 <= interval && interval < intervals() && - get_border_size(interval) <= r && r < nr(octave)-get_border_size(interval) && - get_border_size(interval) <= c && c < nc(octave)-get_border_size(interval), - "\tdouble get_laplacian(octave, interval, r, c)" - << "\n\tInvalid inputs to this function" - << "\n\t this: " << this - << "\n\t octave: " << octave - << "\n\t interval: " << interval - << "\n\t octaves: " << octaves() - << "\n\t intervals: " << intervals() - << "\n\t r: " << r - << "\n\t c: " << c - << "\n\t nr(octave): " << nr(octave) - << "\n\t nc(octave): " << nc(octave) - << "\n\t get_border_size(interval): " << get_border_size(interval) - ); - - // return the sign of the laplacian - if (pyramid[num_intervals*octave + interval][r][c] > 0) - return +1; - else - return -1; - } - - long octaves ( - ) const { return num_octaves; } - - long intervals ( - ) const { return num_intervals; } - - private: - - long num_octaves; - long num_intervals; - long initial_step_size; - - typedef array2d<double> image_type; - typedef array<image_type> pyramid_type; - - pyramid_type pyramid; - }; - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - namespace hessian_pyramid_helpers - { - inline bool is_maximum_in_region( - const hessian_pyramid& pyr, - long o, - long i, - long r, - long c - ) - { - // First check if this point is near the edge of the octave - // If it is then we say it isn't a maximum as these points are - // not as reliable. - if (i <= 0 || i+1 >= pyr.intervals()) - { - return false; - } - - const double val = pyr.get_value(o,i,r,c); - - // now check if there are any bigger values around this guy - for (long ii = i-1; ii <= i+1; ++ii) - { - for (long rr = r-1; rr <= r+1; ++rr) - { - for (long cc = c-1; cc <= c+1; ++cc) - { - if (pyr.get_value(o,ii,rr,cc) > val) - return false; - } - } - } - - return true; - } - - // ------------------------------------------------------------------------------------ - - inline const matrix<double,3,1> get_hessian_gradient ( - const hessian_pyramid& pyr, - long o, - long i, - long r, - long c - ) - { - matrix<double,3,1> grad; - grad(0) = (pyr.get_value(o,i,r,c+1) - pyr.get_value(o,i,r,c-1))/2.0; - grad(1) = (pyr.get_value(o,i,r+1,c) - pyr.get_value(o,i,r-1,c))/2.0; - grad(2) = (pyr.get_value(o,i+1,r,c) - pyr.get_value(o,i-1,r,c))/2.0; - return grad; - } - - // ------------------------------------------------------------------------------------ - - inline const matrix<double,3,3> get_hessian_hessian ( - const hessian_pyramid& pyr, - long o, - long i, - long r, - long c - ) - { - matrix<double,3,3> hess; - const double val = pyr.get_value(o,i,r,c); - - double Dxx = (pyr.get_value(o,i,r,c+1) + pyr.get_value(o,i,r,c-1)) - 2*val; - double Dyy = (pyr.get_value(o,i,r+1,c) + pyr.get_value(o,i,r-1,c)) - 2*val; - double Dss = (pyr.get_value(o,i+1,r,c) + pyr.get_value(o,i-1,r,c)) - 2*val; - - double Dxy = (pyr.get_value(o,i,r+1,c+1) + pyr.get_value(o,i,r-1,c-1) - - pyr.get_value(o,i,r-1,c+1) - pyr.get_value(o,i,r+1,c-1)) / 4.0; - - double Dxs = (pyr.get_value(o,i+1,r,c+1) + pyr.get_value(o,i-1,r,c-1) - - pyr.get_value(o,i-1,r,c+1) - pyr.get_value(o,i+1,r,c-1)) / 4.0; - - double Dys = (pyr.get_value(o,i+1,r+1,c) + pyr.get_value(o,i-1,r-1,c) - - pyr.get_value(o,i-1,r+1,c) - pyr.get_value(o,i+1,r-1,c)) / 4.0; - - - hess = Dxx, Dxy, Dxs, - Dxy, Dyy, Dys, - Dxs, Dys, Dss; - - return hess; - } - - // ------------------------------------------------------------------------------------ - - inline const interest_point interpolate_point ( - const hessian_pyramid& pyr, - long o, - long i, - long r, - long c - ) - { - dlib::vector<double,2> p(c,r); - - dlib::vector<double,3> start_point(c,r,i); - dlib::vector<double,3> interpolated_point = -inv(get_hessian_hessian(pyr,o,i,r,c))*get_hessian_gradient(pyr,o,i,r,c); - - //cout << "inter: " << trans(interpolated_point); - - interest_point temp; - if (max(abs(interpolated_point)) < 0.5) - { - p = (start_point+interpolated_point)*pyr.get_step_size(o); - const double lobe_size = std::pow(2.0, o+1.0)*(i+interpolated_point.z()+1) + 1; - const double filter_size = 3*lobe_size; - const double scale = 1.2/9.0 * filter_size; - - temp.center = p; - temp.scale = scale; - temp.score = pyr.get_value(o,i,r,c); - temp.laplacian = pyr.get_laplacian(o,i,r,c); - } - else - { - // this indicates to the caller that no interest point was found. - temp.score = -1; - } - - return temp; - } - - } - -// ---------------------------------------------------------------------------------------- - - template <typename Alloc> - void get_interest_points ( - const hessian_pyramid& pyr, - double threshold, - std::vector<interest_point,Alloc>& result_points - ) - { - DLIB_ASSERT(threshold >= 0, - "\tvoid get_interest_points()" - << "\n\t Invalid arguments to this function" - << "\n\t threshold: " << threshold - ); - using namespace std; - using namespace hessian_pyramid_helpers; - - result_points.clear(); - - for (long o = 0; o < pyr.octaves(); ++o) - { - const long nr = pyr.nr(o); - const long nc = pyr.nc(o); - - // do non-maximum suppression on all the intervals in the current octave and - // accumulate the results in result_points - for (long i = 1; i < pyr.intervals()-1; i += 1) - { - const long border_size = pyr.get_border_size(i+1); - for (long r = border_size+1; r < nr - border_size-1; r += 1) - { - for (long c = border_size+1; c < nc - border_size-1; c += 1) - { - double max_val = pyr.get_value(o,i,r,c); - long max_i = i; - long max_r = r; - long max_c = c; - - - // If the max point we found is really a maximum in its own region and - // is big enough then add it to the results. - if (max_val >= threshold && is_maximum_in_region(pyr, o, max_i, max_r, max_c)) - { - //cout << max_val << endl; - interest_point sp = interpolate_point (pyr, o, max_i, max_r, max_c); - if (sp.score >= threshold) - { - result_points.push_back(sp); - } - } - - } - } - } - } - - } - -// ---------------------------------------------------------------------------------------- - - template <typename Alloc> - void get_interest_points ( - const hessian_pyramid& pyr, - double threshold, - std_vector_c<interest_point,Alloc>& result_points - ) - /*! - This function is just an overload that automatically casts std_vector_c objects - into std::vector objects. (Usually this is automatic but the template argument - there messes up the conversion so we have to do it explicitly) - !*/ - { - std::vector<interest_point,Alloc>& v = result_points; - get_interest_points(pyr, threshold, v); - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_HESSIAN_PYRAMId_Hh_ - diff --git a/ml/dlib/dlib/image_keypoint/hessian_pyramid_abstract.h b/ml/dlib/dlib/image_keypoint/hessian_pyramid_abstract.h deleted file mode 100644 index 2db39c210..000000000 --- a/ml/dlib/dlib/image_keypoint/hessian_pyramid_abstract.h +++ /dev/null @@ -1,244 +0,0 @@ -// Copyright (C) 2009 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_HESSIAN_PYRAMId_ABSTRACT_Hh_ -#ifdef DLIB_HESSIAN_PYRAMId_ABSTRACT_Hh_ - -#include "../image_transforms/integral_image_abstract.h" -#include "../noncopyable.h" -#include <vector> - -namespace dlib -{ - - class hessian_pyramid : noncopyable - { - /*! - INITIAL VALUE - - octaves() == 0 - - intervals() == 0 - - WHAT THIS OBJECT REPRESENTS - This object represents an image pyramid where each level in the - pyramid holds determinants of Hessian matrices for the original - input image. This object can be used to find stable interest - points in an image. For further details consult the following - papers. - - This object is an implementation of the fast Hessian pyramid - as described in the paper: - SURF: Speeded Up Robust Features - By Herbert Bay, Tinne Tuytelaars, and Luc Van Gool - - This implementation was also influenced by the very well documented - OpenSURF library and its corresponding description of how the fast - Hessian algorithm functions: - Notes on the OpenSURF Library - Christopher Evans - !*/ - public: - - template <typename integral_image_type> - void build_pyramid ( - const integral_image_type& img, - long num_octaves, - long num_intervals, - long initial_step_size - ); - /*! - requires - - num_octaves > 0 - - num_intervals > 0 - - initial_step_size > 0 - - integral_image_type == an object such as dlib::integral_image or another - type that implements the interface defined in image_transforms/integral_image_abstract.h - ensures - - #get_step_size(0) == initial_step_size - - #octaves() == num_octaves - - #intervals() == num_intervals - - creates a Hessian pyramid from the given input image. - !*/ - - long octaves ( - ) const; - /*! - ensures - - returns the number of octaves in this pyramid - !*/ - - long intervals ( - ) const; - /*! - ensures - - returns the number of intervals in this pyramid - !*/ - - long get_border_size ( - long interval - ) const; - /*! - requires - - 0 <= interval < intervals() - ensures - - Each interval of the pyramid has a certain sized border region where we - can't compute the Hessian values since they are too close to the edge - of the input image. This function returns the size of that border. - !*/ - - long get_step_size ( - long octave - ) const; - /*! - requires - - 0 <= octave < octaves() - ensures - - Each octave has a step size value. This value determines how many - input image pixels separate each pixel in the given pyramid octave. - As the octave gets larger (i.e. as it goes to the top of the pyramid) the - step size gets bigger and thus the pyramid narrows. - !*/ - - long nr ( - long octave - ) const; - /*! - requires - - 0 <= octave < octaves() - ensures - - returns the number of rows there are per layer in the given - octave of pyramid - !*/ - - long nc ( - long octave - ) const; - /*! - requires - - 0 <= octave < octaves() - ensures - - returns the number of columns there are per layer in the given - octave of pyramid - !*/ - - double get_value ( - long octave, - long interval, - long r, - long c - ) const; - /*! - requires - - 0 <= octave < octaves() - - 0 <= interval < intervals() - - Let BS == get_border_size(interval): then - - BS <= r < nr(octave)-BS - - BS <= c < nc(octave)-BS - ensures - - returns the determinant of the Hessian from the given octave and interval - of the pyramid. The specific point sampled at this pyramid level is - the one that corresponds to the input image point (point(r,c)*get_step_size(octave)). - !*/ - - double get_laplacian ( - long octave, - long interval, - long r, - long c - ) const; - /*! - requires - - 0 <= octave < octaves() - - 0 <= interval < intervals() - - Let BS == get_border_size(interval): then - - BS <= r < nr(octave)-BS - - BS <= c < nc(octave)-BS - ensures - - returns the sign of the laplacian for the given octave and interval - of the pyramid. The specific point sampled at this pyramid level is - the one that corresponds to the input image point (point(r,c)*get_step_size(octave)). - - The laplacian is the trace of the Hessian at the given point. So this - function returns either +1 or -1 depending on this number's sign. This - value can be used to distinguish bright blobs on dark backgrounds from - the reverse. - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - struct interest_point - { - /*! - WHAT THIS OBJECT REPRESENTS - This object contains the interest points found using the - hessian_pyramid object. Its fields have the following - meanings: - - center == the x/y location of the center of the interest point - (in image space coordinates. y gives the row and x gives the - column in the image) - - scale == the scale at which the point was detected. This is a number - >= 1. If it is 1 then it means the interest point was detected at - the lowest scale in the image pyramid. Larger numbers indicate that - the interest point is from high up in the image pyramid. For - example, a scale of 4 would mean the interest point was located at a - point in the pyramid where the image had been shrunk by a factor of 4. - - score == the determinant of the Hessian for the interest point - - laplacian == the sign of the laplacian for the interest point - !*/ - - interest_point() : scale(0), score(0), laplacian(0) {} - - dlib::vector<double,2> center; - double scale; - double score; - double laplacian; - - bool operator < (const interest_point& p) const { return score < p.score; } - /*! - This function is here so you can sort interest points according to - their scores - !*/ - }; - -// ---------------------------------------------------------------------------------------- - - void serialize ( - const interest_point& item, - std::ostream& out - ); - /*! - provides serialization support - !*/ - - void deserialize ( - interest_point& item, - std::istream& in - ); - /*! - provides serialization support - !*/ - -// ---------------------------------------------------------------------------------------- - - template <typename Alloc> - void get_interest_points ( - const hessian_pyramid& pyr, - double threshold, - std::vector<interest_point,Alloc>& result_points - ) - /*! - requires - - threshold >= 0 - ensures - - extracts interest points from the pyramid pyr and stores them into - result_points (note that result_points is cleared before these new interest - points are added to it). - - Only interest points with determinant values in the pyramid larger than - threshold are output. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_HESSIAN_PYRAMId_ABSTRACT_Hh_ - diff --git a/ml/dlib/dlib/image_keypoint/hog.h b/ml/dlib/dlib/image_keypoint/hog.h deleted file mode 100644 index 823c25d6d..000000000 --- a/ml/dlib/dlib/image_keypoint/hog.h +++ /dev/null @@ -1,514 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_HoG_Hh_ -#define DLIB_HoG_Hh_ - -#include "hog_abstract.h" -#include "../algs.h" -#include "../matrix.h" -#include "../array2d.h" -#include "../geometry.h" -#include <cmath> - -namespace dlib -{ - enum - { - hog_no_interpolation, - hog_angle_interpolation, - hog_full_interpolation, - hog_signed_gradient, - hog_unsigned_gradient - }; - - template < - unsigned long cell_size_, - unsigned long block_size_, - unsigned long cell_stride_, - unsigned long num_orientation_bins_, - int gradient_type_, - int interpolation_type_ - > - class hog_image : noncopyable - { - COMPILE_TIME_ASSERT(cell_size_ > 1); - COMPILE_TIME_ASSERT(block_size_ > 0); - COMPILE_TIME_ASSERT(cell_stride_ > 0); - COMPILE_TIME_ASSERT(num_orientation_bins_ > 0); - - COMPILE_TIME_ASSERT( gradient_type_ == hog_signed_gradient || - gradient_type_ == hog_unsigned_gradient); - - COMPILE_TIME_ASSERT( interpolation_type_ == hog_no_interpolation || - interpolation_type_ == hog_angle_interpolation || - interpolation_type_ == hog_full_interpolation ); - - - public: - - const static unsigned long cell_size = cell_size_; - const static unsigned long block_size = block_size_; - const static unsigned long cell_stride = cell_stride_; - const static unsigned long num_orientation_bins = num_orientation_bins_; - const static int gradient_type = gradient_type_; - const static int interpolation_type = interpolation_type_; - - const static long min_size = cell_size*block_size+2; - - typedef matrix<double, block_size*block_size*num_orientation_bins, 1> descriptor_type; - - hog_image ( - ) : - num_block_rows(0), - num_block_cols(0) - {} - - void clear ( - ) - { - num_block_rows = 0; - num_block_cols = 0; - hist_cells.clear(); - } - - void copy_configuration ( - const hog_image& - ){} - - template < - typename image_type - > - inline void load ( - const image_type& img - ) - { - COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<image_type>::pixel_type>::has_alpha == false ); - load_impl(mat(img)); - } - - inline void unload( - ) { clear(); } - - inline size_t size ( - ) const { return static_cast<size_t>(nr()*nc()); } - - inline long nr ( - ) const { return num_block_rows; } - - inline long nc ( - ) const { return num_block_cols; } - - long get_num_dimensions ( - ) const - { - return block_size*block_size*num_orientation_bins; - } - - inline const descriptor_type& operator() ( - long row, - long col - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT( 0 <= row && row < nr() && - 0 <= col && col < nc(), - "\t descriptor_type hog_image::operator()()" - << "\n\t invalid row or col argument" - << "\n\t row: " << row - << "\n\t col: " << col - << "\n\t nr(): " << nr() - << "\n\t nc(): " << nc() - << "\n\t this: " << this - ); - - row *= cell_stride; - col *= cell_stride; - ++row; - ++col; - - int feat = 0; - for (unsigned long r = 0; r < block_size; ++r) - { - for (unsigned long c = 0; c < block_size; ++c) - { - for (unsigned long i = 0; i < num_orientation_bins; ++i) - { - des(feat++) = hist_cells[row+r][col+c].values[i]; - } - } - } - - des /= length(des) + 1e-8; - - return des; - } - - const rectangle get_block_rect ( - long row, - long col - ) const - { - row *= cell_stride; - col *= cell_stride; - - row *= cell_size; - col *= cell_size; - - // do this to account for the 1 pixel padding we use all around the image - ++row; - ++col; - - return rectangle(col, row, col+cell_size*block_size-1, row+cell_size*block_size-1); - } - - const point image_to_feat_space ( - const point& p - ) const - { - - const long half_block = block_size/2; - if ((block_size%2) == 0) - { - return point(((p.x()-1)/(long)cell_size - half_block)/(long)cell_stride, - ((p.y()-1)/(long)cell_size - half_block)/(long)cell_stride); - } - else - { - return point(((p.x()-1-(long)cell_size/2)/(long)cell_size - half_block)/(long)cell_stride, - ((p.y()-1-(long)cell_size/2)/(long)cell_size - half_block)/(long)cell_stride); - } - } - - const rectangle image_to_feat_space ( - const rectangle& rect - ) const - { - return rectangle(image_to_feat_space(rect.tl_corner()), image_to_feat_space(rect.br_corner())); - } - - const point feat_to_image_space ( - const point& p - ) const - { - const long half_block = block_size/2; - if ((block_size%2) == 0) - { - return point((p.x()*cell_stride + half_block)*cell_size + 1, - (p.y()*cell_stride + half_block)*cell_size + 1); - } - else - { - return point((p.x()*cell_stride + half_block)*cell_size + 1 + cell_size/2, - (p.y()*cell_stride + half_block)*cell_size + 1 + cell_size/2); - } - } - - const rectangle feat_to_image_space ( - const rectangle& rect - ) const - { - return rectangle(feat_to_image_space(rect.tl_corner()), feat_to_image_space(rect.br_corner())); - } - - - - // these _PRIVATE_ functions are only here as a workaround for a bug in visual studio 2005. - void _PRIVATE_serialize (std::ostream& out) const - { - // serialize hist_cells - serialize(hist_cells.nc(),out); - serialize(hist_cells.nr(),out); - hist_cells.reset(); - while (hist_cells.move_next()) - serialize(hist_cells.element().values,out); - hist_cells.reset(); - - - serialize(num_block_rows, out); - serialize(num_block_cols, out); - } - - void _PRIVATE_deserialize (std::istream& in ) - { - // deserialize item.hist_cells - long nc, nr; - deserialize(nc,in); - deserialize(nr,in); - hist_cells.set_size(nr,nc); - while (hist_cells.move_next()) - deserialize(hist_cells.element().values,in); - hist_cells.reset(); - - - deserialize(num_block_rows, in); - deserialize(num_block_cols, in); - } - - private: - - template < - typename image_type - > - void load_impl ( - const image_type& img - ) - { - // Note that we keep a border of 1 pixel all around the image so that we don't have - // to worry about running outside the image when computing the horizontal and vertical - // gradients. - - // Note also that we have a border of unused cells around the hist_cells array so that we - // don't have to worry about edge effects when doing the interpolation in the main loop - // below. - - - // check if the window is just too small - if (img.nr() < min_size || img.nc() < min_size) - { - // If the image is smaller than our windows then there aren't any descriptors at all! - num_block_rows = 0; - num_block_cols = 0; - return; - } - - // Make sure we have the right number of cell histograms and that they are - // all set to zero. - hist_cells.set_size((img.nr()-2)/cell_size+2, (img.nc()-2)/cell_size+2); - for (long r = 0; r < hist_cells.nr(); ++r) - { - for (long c = 0; c < hist_cells.nc(); ++c) - { - hist_cells[r][c].zero(); - } - } - - - // loop over all the histogram cells and fill them out - for (long rh = 1; rh < hist_cells.nr()-1; ++rh) - { - for (long ch = 1; ch < hist_cells.nc()-1; ++ch) - { - // Fill out the current histogram cell. - // First, figure out the row and column offsets into the image for the current histogram cell. - const long roff = (rh-1)*cell_size + 1; - const long coff = (ch-1)*cell_size + 1; - - for (long r = 0; r < (long)cell_size; ++r) - { - for (long c = 0; c < (long)cell_size; ++c) - { - unsigned long left; - unsigned long right; - unsigned long top; - unsigned long bottom; - - assign_pixel(left, img(r+roff,c+coff-1)); - assign_pixel(right, img(r+roff,c+coff+1)); - assign_pixel(top, img(r+roff-1,c+coff)); - assign_pixel(bottom, img(r+roff+1,c+coff)); - - double grad_x = (long)right-(long)left; - double grad_y = (long)top-(long)bottom; - - // obtain the angle of the gradient. Make sure it is scaled between 0 and 1. - double angle = std::max(0.0, std::atan2(grad_y, grad_x)/pi + 1)/2; - - - if (gradient_type == hog_unsigned_gradient) - { - angle *= 2; - if (angle >= 1) - angle -= 1; - } - - - // now scale angle to between 0 and num_orientation_bins - angle *= num_orientation_bins; - - - const double strength = std::sqrt(grad_y*grad_y + grad_x*grad_x); - - - if (interpolation_type == hog_no_interpolation) - { - // no interpolation - hist_cells[rh][ch].values[round_to_int(angle)%num_orientation_bins] += strength; - } - else // if we should do some interpolation - { - unsigned long quantized_angle_lower = static_cast<unsigned long>(std::floor(angle)); - unsigned long quantized_angle_upper = static_cast<unsigned long>(std::ceil(angle)); - - quantized_angle_lower %= num_orientation_bins; - quantized_angle_upper %= num_orientation_bins; - - const double angle_split = (angle-std::floor(angle)); - const double upper_strength = angle_split*strength; - const double lower_strength = (1-angle_split)*strength; - - if (interpolation_type == hog_angle_interpolation) - { - // Stick into gradient histogram. Note that we linearly interpolate between neighboring - // histogram buckets. - hist_cells[rh][ch].values[quantized_angle_lower] += lower_strength; - hist_cells[rh][ch].values[quantized_angle_upper] += upper_strength; - } - else // here we do hog_full_interpolation - { - const double center_r = (cell_size-1)/2.0; - const double center_c = (cell_size-1)/2.0; - - const double lin_neighbor_r = std::abs(center_r - r)/cell_size; - const double lin_main_r = 1-lin_neighbor_r; - - const double lin_neighbor_c = std::abs(center_c - c)/cell_size; - const double lin_main_c = 1-lin_neighbor_c; - - // Which neighboring cells we interpolate into depends on which - // corner of our main cell we are nearest. - if (r < center_r) - { - if (c < center_c) - { - hist_cells[rh][ch].values[quantized_angle_upper] += upper_strength * lin_main_r*lin_main_c; - hist_cells[rh][ch].values[quantized_angle_lower] += lower_strength * lin_main_r*lin_main_c; - - hist_cells[rh-1][ch].values[quantized_angle_upper] += upper_strength * lin_neighbor_r*lin_main_c; - hist_cells[rh-1][ch].values[quantized_angle_lower] += lower_strength * lin_neighbor_r*lin_main_c; - - hist_cells[rh][ch-1].values[quantized_angle_upper] += upper_strength * lin_neighbor_c*lin_main_r; - hist_cells[rh][ch-1].values[quantized_angle_lower] += lower_strength * lin_neighbor_c*lin_main_r; - - hist_cells[rh-1][ch-1].values[quantized_angle_upper] += upper_strength * lin_neighbor_c*lin_neighbor_r; - hist_cells[rh-1][ch-1].values[quantized_angle_lower] += lower_strength * lin_neighbor_c*lin_neighbor_r; - } - else - { - hist_cells[rh][ch].values[quantized_angle_upper] += upper_strength * lin_main_r*lin_main_c; - hist_cells[rh][ch].values[quantized_angle_lower] += lower_strength * lin_main_r*lin_main_c; - - hist_cells[rh-1][ch].values[quantized_angle_upper] += upper_strength * lin_neighbor_r*lin_main_c; - hist_cells[rh-1][ch].values[quantized_angle_lower] += lower_strength * lin_neighbor_r*lin_main_c; - - hist_cells[rh][ch+1].values[quantized_angle_upper] += upper_strength * lin_neighbor_c*lin_main_r; - hist_cells[rh][ch+1].values[quantized_angle_lower] += lower_strength * lin_neighbor_c*lin_main_r; - - hist_cells[rh-1][ch+1].values[quantized_angle_upper] += upper_strength * lin_neighbor_c*lin_neighbor_r; - hist_cells[rh-1][ch+1].values[quantized_angle_lower] += lower_strength * lin_neighbor_c*lin_neighbor_r; - } - } - else - { - if (c < center_c) - { - hist_cells[rh][ch].values[quantized_angle_upper] += upper_strength * lin_main_r*lin_main_c; - hist_cells[rh][ch].values[quantized_angle_lower] += lower_strength * lin_main_r*lin_main_c; - - hist_cells[rh+1][ch].values[quantized_angle_upper] += upper_strength * lin_neighbor_r*lin_main_c; - hist_cells[rh+1][ch].values[quantized_angle_lower] += lower_strength * lin_neighbor_r*lin_main_c; - - hist_cells[rh][ch-1].values[quantized_angle_upper] += upper_strength * lin_neighbor_c*lin_main_r; - hist_cells[rh][ch-1].values[quantized_angle_lower] += lower_strength * lin_neighbor_c*lin_main_r; - - hist_cells[rh+1][ch-1].values[quantized_angle_upper] += upper_strength * lin_neighbor_c*lin_neighbor_r; - hist_cells[rh+1][ch-1].values[quantized_angle_lower] += lower_strength * lin_neighbor_c*lin_neighbor_r; - } - else - { - hist_cells[rh][ch].values[quantized_angle_upper] += upper_strength * lin_main_r*lin_main_c; - hist_cells[rh][ch].values[quantized_angle_lower] += lower_strength * lin_main_r*lin_main_c; - - hist_cells[rh+1][ch].values[quantized_angle_upper] += upper_strength * lin_neighbor_r*lin_main_c; - hist_cells[rh+1][ch].values[quantized_angle_lower] += lower_strength * lin_neighbor_r*lin_main_c; - - hist_cells[rh][ch+1].values[quantized_angle_upper] += upper_strength * lin_neighbor_c*lin_main_r; - hist_cells[rh][ch+1].values[quantized_angle_lower] += lower_strength * lin_neighbor_c*lin_main_r; - - hist_cells[rh+1][ch+1].values[quantized_angle_upper] += upper_strength * lin_neighbor_c*lin_neighbor_r; - hist_cells[rh+1][ch+1].values[quantized_angle_lower] += lower_strength * lin_neighbor_c*lin_neighbor_r; - } - } - } - } - - - } - } - } - } - - - // Now figure out how many blocks we should have. Note again that the hist_cells has a border of - // unused cells (thats where that -2 comes from). - num_block_rows = (hist_cells.nr()-2 - (block_size-1) + cell_stride - 1)/cell_stride; - num_block_cols = (hist_cells.nc()-2 - (block_size-1) + cell_stride - 1)/cell_stride; - - } - - unsigned long round_to_int( - double val - ) const - { - return static_cast<unsigned long>(std::floor(val + 0.5)); - } - - struct histogram - { - void zero() - { - for (unsigned long i = 0; i < num_orientation_bins; ++i) - values[i] = 0; - } - double values[num_orientation_bins]; - }; - - array2d<histogram> hist_cells; - - mutable descriptor_type des; - - long num_block_rows; - long num_block_cols; - - - }; - -// ---------------------------------------------------------------------------------------- - - template < - unsigned long T1, - unsigned long T2, - unsigned long T3, - unsigned long T4, - int T5, - int T6 - > - void serialize ( - const hog_image<T1,T2,T3,T4,T5,T6>& item, - std::ostream& out - ) - { - item._PRIVATE_serialize(out); - } - - template < - unsigned long T1, - unsigned long T2, - unsigned long T3, - unsigned long T4, - int T5, - int T6 - > - void deserialize ( - hog_image<T1,T2,T3,T4,T5,T6>& item, - std::istream& in - ) - { - item._PRIVATE_deserialize(in); - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_HoG_Hh_ - diff --git a/ml/dlib/dlib/image_keypoint/hog_abstract.h b/ml/dlib/dlib/image_keypoint/hog_abstract.h deleted file mode 100644 index 26c8cab64..000000000 --- a/ml/dlib/dlib/image_keypoint/hog_abstract.h +++ /dev/null @@ -1,335 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_HoG_ABSTRACT_Hh_ -#ifdef DLIB_HoG_ABSTRACT_Hh_ - -#include "../algs.h" -#include "../matrix.h" -#include "../array2d.h" -#include "../geometry.h" -#include <cmath> - -namespace dlib -{ - enum - { - hog_no_interpolation, - hog_angle_interpolation, - hog_full_interpolation, - hog_signed_gradient, - hog_unsigned_gradient - }; - - template < - unsigned long cell_size_, - unsigned long block_size_, - unsigned long cell_stride_, - unsigned long num_orientation_bins_, - int gradient_type_, - int interpolation_type_ - > - class hog_image : noncopyable - { - /*! - REQUIREMENTS ON TEMPLATE PARAMETERS - - cell_size_ > 1 - - block_size_ > 0 - - cell_stride_ > 0 - - num_orientation_bins_ > 0 - - gradient_type_ == hog_signed_gradient or hog_unsigned_gradient - - interpolation_type_ == hog_no_interpolation, hog_angle_interpolation, or - hog_full_interpolation - - INITIAL VALUE - - size() == 0 - - WHAT THIS OBJECT REPRESENTS - This object is a tool for performing the image feature extraction algorithm - described in the following paper: - Histograms of Oriented Gradients for Human Detection - by Navneet Dalal and Bill Triggs - - - To summarize the technique, this object tiles non-overlapping cells over an - image. Each of these cells is a box that is cell_size by cell_size pixels - in size. Each cell contains an array of size num_orientation_bins. The array - in a cell is used to store a histogram of all the edge orientations contained - within the cell's image region. - - Once the grid of cells and their histograms has been computed (via load()) - you can obtain descriptors for each "block" in the image. A block is just a - group of cells and blocks are allowed to overlap. Each block is square and - made up of block_size*block_size cells. So when you call operator()(r,c) - what you obtain is a vector that is just a bunch of cell histograms that - have been concatenated (and length normalized). - - The template arguments control the various parameters of this algorithm. - - The interpolation_type parameter controls the amount of interpolation - that happens during the creation of the edge orientation histograms. It - varies from no interpolation at all to full spatial and angle interpolation. - - Angle interpolation means that an edge doesn't just go into its nearest - histogram bin but instead gets interpolated into its two nearest neighbors. - Similarly, spatial interpolation means that an edge doesn't just go into - the cell it is in but it also contributes to nearby cells depending on how - close they are. - - The gradient_type parameter controls how edge orientations are measured. - Consider the following ASCII art: - signed gradients: unsigned gradients: - /\ | - || | - <--- ----> ------+------ - || | - \/ | - - An image is full of gradients caused by edges between objects. The direction - of a gradient is determined by which end of it has pixels of highest intensity. - So for example, suppose you had a picture containing black and white stripes. - Then the magnitude of the gradient at each point in the image tells you if you - are on the edge of a stripe and the gradient's orientation tells you which - direction you have to move get into the white stripe. - - Signed gradients preserve this direction information while unsigned gradients - do not. An unsigned gradient will only tell you the orientation of the stripe - but not which direction leads to the white stripe. - - Finally, the cell_stride parameter controls how much overlap you get between - blocks. The maximum amount of overlap is obtained when cell_stride == 1. - At the other extreme, you would have no overlap if cell_stride == block_size. - - - THREAD SAFETY - Concurrent access to an instance of this object is not safe and should be protected - by a mutex lock except for the case where you are copying the configuration - (via copy_configuration()) of a hog_image object to many other threads. - In this case, it is safe to copy the configuration of a shared object so long - as no other operations are performed on it. - !*/ - - public: - - const static unsigned long cell_size = cell_size_; - const static unsigned long block_size = block_size_; - const static unsigned long cell_stride = cell_stride_; - const static unsigned long num_orientation_bins = num_orientation_bins_; - const static int gradient_type = gradient_type_; - const static int interpolation_type = interpolation_type_; - - const static long min_size = cell_size*block_size+2; - - typedef matrix<double, block_size*block_size*num_orientation_bins, 1> descriptor_type; - - hog_image ( - ); - /*! - ensures - - this object is properly initialized - !*/ - - void clear ( - ); - /*! - ensures - - this object will have its initial value - !*/ - - void copy_configuration ( - const hog_image& item - ); - /*! - ensures - - copies all the state information of item into *this, except for state - information populated by load(). More precisely, given two hog_image - objects H1 and H2, the following sequence of instructions should always - result in both of them having the exact same state. - H2.copy_configuration(H1); - H1.load(img); - H2.load(img); - !*/ - - template < - typename image_type - > - inline void load ( - const image_type& img - ); - /*! - requires - - image_type is a dlib::matrix or something convertible to a matrix - via mat(). - - pixel_traits<typename image_traits<image_type>::pixel_type>::has_alpha == false - ensures - - if (img.nr() < min_size || img.nc() < min_size) then - - the image is too small so we don't compute anything on it - - #size() == 0 - - else - - generates a HOG image from the given image. - - #size() > 0 - !*/ - - inline void unload ( - ); - /*! - ensures - - #nr() == 0 - - #nc() == 0 - - clears only the state information which is populated by load(). For - example, let H be a hog_image object. Then consider the two sequences - of instructions: - Sequence 1: - H.load(img); - H.unload(); - H.load(img); - - Sequence 2: - H.load(img); - Both sequence 1 and sequence 2 should have the same effect on H. - !*/ - - inline size_t size ( - ) const; - /*! - ensures - - returns nr()*nc() - !*/ - - inline long nr ( - ) const; - /*! - ensures - - returns the number of rows in this HOG image - !*/ - - inline long nc ( - ) const; - /*! - ensures - - returns the number of columns in this HOG image - !*/ - - long get_num_dimensions ( - ) const; - /*! - ensures - - returns the number of dimensions in the feature vectors generated by - this object. - - In particular, returns the value block_size*block_size*num_orientation_bins - !*/ - - inline const descriptor_type& operator() ( - long row, - long col - ) const; - /*! - requires - - 0 <= row < nr() - - 0 <= col < nc() - ensures - - returns the descriptor for the HOG block at the given row and column. This descriptor - will include information from a window that is located at get_block_rect(row,col) in - the original image given to load(). - - The returned descriptor vector will have get_num_dimensions() elements. - !*/ - - const rectangle get_block_rect ( - long row, - long col - ) const; - /*! - ensures - - returns a rectangle that tells you what part of the original image is associated - with a particular HOG block. That is, what part of the input image is associated - with (*this)(row,col). - - The returned rectangle will be cell_size*block_size pixels wide and tall. - !*/ - - const point image_to_feat_space ( - const point& p - ) const; - /*! - ensures - - Each local feature is extracted from a certain point in the input image. - This function returns the identity of the local feature corresponding - to the image location p. Or in other words, let P == image_to_feat_space(p), - then (*this)(P.y(),P.x()) == the local feature closest to, or centered at, - the point p in the input image. Note that some image points might not have - corresponding feature locations. E.g. border points or points outside the - image. In these cases the returned point will be outside get_rect(*this). - !*/ - - const rectangle image_to_feat_space ( - const rectangle& rect - ) const; - /*! - ensures - - returns rectangle(image_to_feat_space(rect.tl_corner()), image_to_feat_space(rect.br_corner())); - (i.e. maps a rectangle from image space to feature space) - !*/ - - const point feat_to_image_space ( - const point& p - ) const; - /*! - ensures - - returns the location in the input image space corresponding to the center - of the local feature at point p. In other words, this function computes - the inverse of image_to_feat_space(). Note that it may only do so approximately, - since more than one image location might correspond to the same local feature. - That is, image_to_feat_space() might not be invertible so this function gives - the closest possible result. - !*/ - - const rectangle feat_to_image_space ( - const rectangle& rect - ) const; - /*! - ensures - - return rectangle(feat_to_image_space(rect.tl_corner()), feat_to_image_space(rect.br_corner())); - (i.e. maps a rectangle from feature space to image space) - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - template < - unsigned long T1, - unsigned long T2, - unsigned long T3, - unsigned long T4, - int T5, - int T6 - > - void serialize ( - const hog_image<T1,T2,T3,T4,T5,T6>& item, - std::ostream& out - ); - /*! - provides serialization support - !*/ - - template < - unsigned long T1, - unsigned long T2, - unsigned long T3, - unsigned long T4, - int T5, - int T6 - > - void deserialize ( - hog_image<T1,T2,T3,T4,T5,T6>& item, - std::istream& in - ); - /*! - provides deserialization support - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_HoG_ABSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/image_keypoint/nearest_neighbor_feature_image.h b/ml/dlib/dlib/image_keypoint/nearest_neighbor_feature_image.h deleted file mode 100644 index 2ee45da2f..000000000 --- a/ml/dlib/dlib/image_keypoint/nearest_neighbor_feature_image.h +++ /dev/null @@ -1,408 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_NEAREST_NEIGHBOR_FeATURE_IMAGE_Hh_ -#define DLIB_NEAREST_NEIGHBOR_FeATURE_IMAGE_Hh_ - -#include "nearest_neighbor_feature_image_abstract.h" -#include <vector> -#include "../algs.h" -#include "../matrix.h" -#include "../statistics.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - class nearest_neighbor_feature_image : noncopyable - { - /*! - INITIAL VALUE - - nn_feats.size() == 1 - - CONVENTION - - nn_feats.size() == 1 - - !*/ - - public: - - typedef std::vector<std::pair<unsigned int,double> > descriptor_type; - - nearest_neighbor_feature_image ( - ); - - void clear ( - ); - - void copy_configuration ( - const feature_extractor& item - ); - - void copy_configuration ( - const nearest_neighbor_feature_image& item - ); - - template < - typename image_type - > - inline void load ( - const image_type& img - ); - - inline size_t size ( - ) const; - - inline long nr ( - ) const; - - inline long nc ( - ) const; - - inline long get_num_dimensions ( - ) const; - - template <typename vector_type> - void set_basis ( - const vector_type& new_basis - ); - - inline const descriptor_type& operator() ( - long row, - long col - ) const; - - inline const rectangle get_block_rect ( - long row, - long col - ) const; - - inline const point image_to_feat_space ( - const point& p - ) const; - - inline const rectangle image_to_feat_space ( - const rectangle& rect - ) const; - - inline const point feat_to_image_space ( - const point& p - ) const; - - inline const rectangle feat_to_image_space ( - const rectangle& rect - ) const; - - template <typename T> - friend void serialize ( - const nearest_neighbor_feature_image<T>& item, - std::ostream& out - ); - - template <typename T> - friend void deserialize ( - nearest_neighbor_feature_image<T>& item, - std::istream& in - ); - - private: - - array2d<unsigned long> feats; - feature_extractor fe; - std::vector<typename feature_extractor::descriptor_type> basis; - - // This is a transient variable. It is just here so it doesn't have to be - // reallocated over and over inside operator() - mutable descriptor_type nn_feats; - - }; - -// ---------------------------------------------------------------------------------------- - - template <typename T> - void serialize ( - const nearest_neighbor_feature_image<T>& item, - std::ostream& out - ) - { - serialize(item.feats, out); - serialize(item.fe, out); - serialize(item.basis, out); - } - - template <typename T> - void deserialize ( - nearest_neighbor_feature_image<T>& item, - std::istream& in - ) - { - deserialize(item.feats, in); - deserialize(item.fe, in); - deserialize(item.basis, in); - } - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// nearest_neighbor_feature_image member functions -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - nearest_neighbor_feature_image<feature_extractor>:: - nearest_neighbor_feature_image ( - ) - { - nn_feats.resize(1); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - void nearest_neighbor_feature_image<feature_extractor>:: - clear ( - ) - { - feats.clear(); - fe.clear(); - basis.clear(); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - void nearest_neighbor_feature_image<feature_extractor>:: - copy_configuration ( - const feature_extractor& item - ) - { - fe.copy_configuration(item); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - void nearest_neighbor_feature_image<feature_extractor>:: - copy_configuration ( - const nearest_neighbor_feature_image& item - ) - { - fe.copy_configuration(item.fe); - basis = item.basis; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - template < - typename image_type - > - void nearest_neighbor_feature_image<feature_extractor>:: - load ( - const image_type& img - ) - { - fe.load(img); - - feats.set_size(fe.nr(), fe.nc()); - - // find the nearest neighbor for each feature vector and store the - // result in feats. - for (long r = 0; r < feats.nr(); ++r) - { - for (long c = 0; c < feats.nc(); ++c) - { - const typename feature_extractor::descriptor_type& local_feat = fe(r,c); - - double best_dist = std::numeric_limits<double>::infinity(); - unsigned long best_idx = 0; - for (unsigned long i = 0; i < basis.size(); ++i) - { - double dist = length_squared(local_feat - basis[i]); - if (dist < best_dist) - { - best_dist = dist; - best_idx = i; - } - } - - feats[r][c] = best_idx; - } - } - - fe.unload(); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - size_t nearest_neighbor_feature_image<feature_extractor>:: - size ( - ) const - { - return feats.size(); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - long nearest_neighbor_feature_image<feature_extractor>:: - nr ( - ) const - { - return feats.nr(); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - long nearest_neighbor_feature_image<feature_extractor>:: - nc ( - ) const - { - return feats.nc(); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - long nearest_neighbor_feature_image<feature_extractor>:: - get_num_dimensions ( - ) const - { - return basis.size(); - } - -// ---------------------------------------------------------------------------------------- - - template <typename feature_extractor> - template <typename vector_type> - void nearest_neighbor_feature_image<feature_extractor>:: - set_basis ( - const vector_type& new_basis - ) - { - basis.assign(new_basis.begin(), new_basis.end()); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - const typename nearest_neighbor_feature_image<feature_extractor>::descriptor_type& - nearest_neighbor_feature_image<feature_extractor>:: - operator() ( - long row, - long col - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(0 <= row && row < nr() && - 0 <= col && col < nc(), - "\t descriptor_type nearest_neighbor_feature_image::operator(row,col)" - << "\n\t Invalid inputs were given to this function" - << "\n\t row: " << row - << "\n\t col: " << col - << "\n\t nr(): " << nr() - << "\n\t nc(): " << nc() - << "\n\t this: " << this - ); - - nn_feats[0] = std::make_pair(feats[row][col],1); - return nn_feats; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - const rectangle nearest_neighbor_feature_image<feature_extractor>:: - get_block_rect ( - long row, - long col - ) const - { - return fe.get_block_rect(row,col); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - const point nearest_neighbor_feature_image<feature_extractor>:: - image_to_feat_space ( - const point& p - ) const - { - return fe.image_to_feat_space(p); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - const rectangle nearest_neighbor_feature_image<feature_extractor>:: - image_to_feat_space ( - const rectangle& rect - ) const - { - return fe.image_to_feat_space(rect); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - const point nearest_neighbor_feature_image<feature_extractor>:: - feat_to_image_space ( - const point& p - ) const - { - return fe.feat_to_image_space(p); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - const rectangle nearest_neighbor_feature_image<feature_extractor>:: - feat_to_image_space ( - const rectangle& rect - ) const - { - return fe.feat_to_image_space(rect); - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_NEAREST_NEIGHBOR_FeATURE_IMAGE_Hh_ - - diff --git a/ml/dlib/dlib/image_keypoint/nearest_neighbor_feature_image_abstract.h b/ml/dlib/dlib/image_keypoint/nearest_neighbor_feature_image_abstract.h deleted file mode 100644 index 59d7cfeb7..000000000 --- a/ml/dlib/dlib/image_keypoint/nearest_neighbor_feature_image_abstract.h +++ /dev/null @@ -1,254 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_NEAREST_NEIGHBOR_FeATURE_IMAGE_ABSTRACT_Hh_ -#ifdef DLIB_NEAREST_NEIGHBOR_FeATURE_IMAGE_ABSTRACT_Hh_ - -#include <vector> -#include "../algs.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename feature_extractor - > - class nearest_neighbor_feature_image : noncopyable - { - /*! - REQUIREMENTS ON feature_extractor - - must be an object with an interface compatible with dlib::hog_image - - INITIAL VALUE - - size() == 0 - - get_num_dimensions() == 0 - - WHAT THIS OBJECT REPRESENTS - This object is a tool for performing image feature extraction. In - particular, it wraps another image feature extractor and converts - the wrapped image feature vectors into sparse indicator vectors. It does - this by finding the nearest neighbor for each feature vector and returning an - indicator vector that is zero everywhere except for the position indicated by - the nearest neighbor. - - - THREAD SAFETY - Concurrent access to an instance of this object is not safe and should be protected - by a mutex lock except for the case where you are copying the configuration - (via copy_configuration()) of a nearest_neighbor_feature_image object to many other - threads. In this case, it is safe to copy the configuration of a shared object so - long as no other operations are performed on it. - - - NOTATION - let BASE_FE denote the base feature_extractor object contained inside - the nearest_neighbor_feature_image. - !*/ - - public: - - typedef std::vector<std::pair<unsigned int,double> > descriptor_type; - - nearest_neighbor_feature_image ( - ); - /*! - ensures - - this object is properly initialized - !*/ - - void clear ( - ); - /*! - ensures - - this object will have its initial value - !*/ - - void copy_configuration ( - const feature_extractor& item - ); - /*! - ensures - - performs BASE_FE.copy_configuration(item) - !*/ - - void copy_configuration ( - const nearest_neighbor_feature_image& item - ); - /*! - ensures - - copies all the state information of item into *this, except for state - information populated by load(). More precisely, given two - nearest_neighbor_feature_image objects H1 and H2, the following sequence - of instructions should always result in both of them having the exact - same state. - H2.copy_configuration(H1); - H1.load(img); - H2.load(img); - !*/ - - template < - typename image_type - > - inline void load ( - const image_type& img - ); - /*! - requires - - image_type == any type that can be supplied to feature_extractor::load() - ensures - - performs BASE_FE.load(img) - i.e. does feature extraction. The features can be accessed using - operator() as defined below. - !*/ - - inline size_t size ( - ) const; - /*! - ensures - - returns BASE_FE.size() - !*/ - - inline long nr ( - ) const; - /*! - ensures - - returns BASE_FE.nr() - !*/ - - inline long nc ( - ) const; - /*! - ensures - - returns BASE_FE.nc() - !*/ - - inline long get_num_dimensions ( - ) const; - /*! - ensures - - returns the dimensionality of the feature vectors returned by operator(). - In this case, this is the number of basis elements. That is, it is the number - of vectors given to the set_basis() member function. - !*/ - - template <typename vector_type> - void set_basis ( - const vector_type& new_basis - ); - /*! - ensures - - #get_num_dimensions() == new_basis.size() - - The operator() member function defined below will use new_basis to - determine nearest neighbors. - !*/ - - inline const descriptor_type& operator() ( - long row, - long col - ) const; - /*! - requires - - 0 <= row < nr() - - 0 <= col < nc() - - get_num_dimensions() > 0 - ensures - - determines which basis element is nearest to BASE_FE(row,col) and returns a sparse - indicator vector identifying the nearest neighbor. - - To be precise, this function returns a sparse vector V such that: - - V.size() == 1 - - V[0].first == The basis element index for the basis vector nearest to BASE_FE(row,col). - "nearness" is determined using Euclidean distance. - - V[0].second == 1 - !*/ - - inline const rectangle get_block_rect ( - long row, - long col - ) const; - /*! - ensures - - returns BASE_FE.get_block_rect(row,col) - I.e. returns a rectangle that tells you what part of the original image is associated - with a particular feature vector. - !*/ - - inline const point image_to_feat_space ( - const point& p - ) const; - /*! - ensures - - returns BASE_FE.image_to_feat_space(p) - I.e. Each local feature is extracted from a certain point in the input image. - This function returns the identity of the local feature corresponding - to the image location p. Or in other words, let P == image_to_feat_space(p), - then (*this)(P.y(),P.x()) == the local feature closest to, or centered at, - the point p in the input image. Note that some image points might not have - corresponding feature locations. E.g. border points or points outside the - image. In these cases the returned point will be outside get_rect(*this). - !*/ - - inline const rectangle image_to_feat_space ( - const rectangle& rect - ) const; - /*! - ensures - - returns BASE_FE.image_to_feat_space(rect) - I.e. returns rectangle(image_to_feat_space(rect.tl_corner()), image_to_feat_space(rect.br_corner())); - (i.e. maps a rectangle from image space to feature space) - !*/ - - inline const point feat_to_image_space ( - const point& p - ) const; - /*! - ensures - - returns BASE_FE.feat_to_image_space(p) - I.e. returns the location in the input image space corresponding to the center - of the local feature at point p. In other words, this function computes - the inverse of image_to_feat_space(). Note that it may only do so approximately, - since more than one image location might correspond to the same local feature. - That is, image_to_feat_space() might not be invertible so this function gives - the closest possible result. - !*/ - - inline const rectangle feat_to_image_space ( - const rectangle& rect - ) const; - /*! - ensures - - returns BASE_FE.feat_to_image_space(rect) - I.e. return rectangle(feat_to_image_space(rect.tl_corner()), feat_to_image_space(rect.br_corner())); - (i.e. maps a rectangle from feature space to image space) - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - template <typename T> - void serialize ( - const nearest_neighbor_feature_image<T>& item, - std::ostream& out - ); - /*! - provides serialization support - !*/ - - template <typename T> - void deserialize ( - nearest_neighbor_feature_image<T>& item, - std::istream& in - ); - /*! - provides deserialization support - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_NEAREST_NEIGHBOR_FeATURE_IMAGE_ABSTRACT_Hh_ - - - diff --git a/ml/dlib/dlib/image_keypoint/poly_image.h b/ml/dlib/dlib/image_keypoint/poly_image.h deleted file mode 100644 index 8abb912f0..000000000 --- a/ml/dlib/dlib/image_keypoint/poly_image.h +++ /dev/null @@ -1,649 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_POLY_ImAGE_Hh_ -#define DLIB_POLY_ImAGE_Hh_ - -#include "poly_image_abstract.h" -#include "build_separable_poly_filters.h" -#include "../algs.h" -#include "../matrix.h" -#include "../array2d.h" -#include "../geometry.h" -#include <cmath> - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - long Downsample - > - class poly_image : noncopyable - { - COMPILE_TIME_ASSERT(Downsample >= 1); - public: - const static long downsample = Downsample; - typedef matrix<double, 0, 1> descriptor_type; - - poly_image( - long order_, - long window_size_, - bool normalization = true, - bool rotation_invariance_ = false - ) - { - setup(order_, window_size_); - set_uses_normalization(normalization); - set_is_rotationally_invariant(rotation_invariance_); - } - - poly_image ( - ) - { - clear(); - } - - void clear ( - ) - { - normalize = true; - rotation_invariance = false; - poly_coef.clear(); - order = 3; - window_size = 13; - border_size = (long)std::ceil(std::floor(window_size/2.0)/downsample); - num_rows = 0; - num_cols = 0; - filters = build_separable_poly_filters(order, window_size); - } - - long get_order ( - ) const - { - return order; - } - - long get_window_size ( - ) const - { - return window_size; - } - - void setup ( - long order_, - long window_size_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(1 <= order_ && order_ <= 6 && - window_size_ >= 3 && (window_size_%2) == 1, - "\t descriptor_type poly_image::setup()" - << "\n\t Invalid arguments were given to this function." - << "\n\t order_: " << order_ - << "\n\t window_size_: " << window_size_ - << "\n\t this: " << this - ); - - - poly_coef.clear(); - order = order_; - window_size = window_size_; - border_size = (long)std::ceil(std::floor(window_size/2.0)/downsample); - num_rows = 0; - num_cols = 0; - filters = build_separable_poly_filters(order, window_size); - } - - bool uses_normalization ( - ) const { return normalize; } - - void set_uses_normalization ( - bool normalization - ) - { - normalize = normalization; - } - - bool is_rotationally_invariant ( - ) const { return rotation_invariance; } - - void set_is_rotationally_invariant ( - bool rotation_invariance_ - ) - { - rotation_invariance = rotation_invariance_; - } - - void copy_configuration ( - const poly_image& item - ) - { - normalize = item.normalize; - rotation_invariance = item.rotation_invariance; - if (order != item.order || - window_size != item.window_size) - { - order = item.order; - window_size = item.window_size; - border_size = item.border_size; - filters = item.filters; - } - } - - template < - typename image_type - > - inline void load ( - const image_type& img - ) - { - COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<image_type>::pixel_type>::has_alpha == false ); - - poly_coef.resize(get_num_dimensions()); - des.set_size(get_num_dimensions()); - - - if (normalize) - { - array2d<float> coef0; - rectangle rect = filter_image(img, coef0, filters[0]); - num_rows = rect.height(); - num_cols = rect.width(); - - for (unsigned long i = 1; i < filters.size(); ++i) - { - filter_image(img, poly_coef[i-1], filters[i]); - - // intensity normalize everything - for (long r = 0; r < coef0.nr(); ++r) - { - for (long c = 0; c < coef0.nc(); ++c) - { - if (coef0[r][c] >= 1) - poly_coef[i-1][r][c] /= coef0[r][c]; - else - poly_coef[i-1][r][c] = 0; - } - } - } - - if (rotation_invariance) - rotate_polys(rect); - } - else - { - rectangle rect; - for (unsigned long i = 0; i < filters.size(); ++i) - { - rect = filter_image(img, poly_coef[i], filters[i]); - } - num_rows = rect.height(); - num_cols = rect.width(); - - if (rotation_invariance) - rotate_polys(rect); - } - } - - void unload() - { - poly_coef.clear(); - num_rows = 0; - num_cols = 0; - } - - inline size_t size ( - ) const { return static_cast<unsigned long>(nr()*nc()); } - - inline long nr ( - ) const { return num_rows; } - - inline long nc ( - ) const { return num_cols; } - - long get_num_dimensions ( - ) const - { - if (normalize) - { - // -1 because we discard the constant term of the polynomial. - return filters.size()-1; - } - else - { - return filters.size(); - } - } - - inline const descriptor_type& operator() ( - long row, - long col - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT( 0 <= row && row < nr() && - 0 <= col && col < nc(), - "\t descriptor_type poly_image::operator()()" - << "\n\t invalid row or col argument" - << "\n\t row: " << row - << "\n\t col: " << col - << "\n\t nr(): " << nr() - << "\n\t nc(): " << nc() - << "\n\t this: " << this - ); - - // add because of the zero border around the poly_coef images - row += border_size; - col += border_size; - - for (long i = 0; i < des.size(); ++i) - des(i) = poly_coef[i][row][col]; - - return des; - } - - const rectangle get_block_rect ( - long row, - long col - ) const - { - return centered_rect(Downsample*point(col+border_size, row+border_size), - window_size, window_size); - } - - const point image_to_feat_space ( - const point& p - ) const - { - return p/Downsample - point(border_size, border_size); - } - - const rectangle image_to_feat_space ( - const rectangle& rect - ) const - { - return rectangle(image_to_feat_space(rect.tl_corner()), image_to_feat_space(rect.br_corner())); - } - - const point feat_to_image_space ( - const point& p - ) const - { - return (p + point(border_size, border_size))*Downsample; - } - - const rectangle feat_to_image_space ( - const rectangle& rect - ) const - { - return rectangle(feat_to_image_space(rect.tl_corner()), feat_to_image_space(rect.br_corner())); - } - - - - friend void serialize (const poly_image& item, std::ostream& out) - { - int version = 1; - serialize(version, out); - serialize(item.poly_coef, out); - serialize(item.order, out); - serialize(item.window_size, out); - serialize(item.border_size, out); - serialize(item.num_rows, out); - serialize(item.num_cols, out); - serialize(item.normalize, out); - serialize(item.rotation_invariance, out); - serialize(item.filters, out); - } - - friend void deserialize (poly_image& item, std::istream& in ) - { - int version = 0; - deserialize(version, in); - if (version != 1) - throw dlib::serialization_error("Unexpected version found while deserializing dlib::poly_image"); - - deserialize(item.poly_coef, in); - deserialize(item.order, in); - deserialize(item.window_size, in); - deserialize(item.border_size, in); - deserialize(item.num_rows, in); - deserialize(item.num_cols, in); - deserialize(item.normalize, in); - deserialize(item.rotation_invariance, in); - deserialize(item.filters, in); - } - - private: - - matrix<float,2,1> rotate_order_1 ( - const matrix<float,2,1>& w, - double cos_theta, - double sin_theta - ) const - { - const double w1 = w(0); - const double w2 = w(1); - matrix<double,2,2> M; - M = w1, w2, - w2, -w1; - - matrix<double,2,1> x; - x = cos_theta, - sin_theta; - - return matrix_cast<float>(M*x); - } - - matrix<float,3,1> rotate_order_2 ( - const matrix<float,3,1>& w, - double cos_theta, - double sin_theta - ) const - { - const double w1 = w(0); - const double w2 = w(1); - const double w3 = w(2); - matrix<double,3,3> M; - M = w1, w2, w3, - w2, (2*w3-2*w1), -w2, - w3, -w2, w1; - - matrix<double,3,1> x; - x = std::pow(cos_theta,2.0), - cos_theta*sin_theta, - std::pow(sin_theta,2.0); - - return matrix_cast<float>(M*x); - } - - matrix<float,4,1> rotate_order_3 ( - const matrix<float,4,1>& w, - double cos_theta, - double sin_theta - ) const - { - const double w1 = w(0); - const double w2 = w(1); - const double w3 = w(2); - const double w4 = w(3); - matrix<double,4,4> M; - M = w1, w2, w3, w4, - w2, (2*w3-3*w1), (3*w4-2*w2), -w3, - w3, (3*w4-2*w2), (3*w1-2*w3), w2, - w4, -w3, w2, -w1; - - matrix<double,4,1> x; - x = std::pow(cos_theta,3.0), - std::pow(cos_theta,2.0)*sin_theta, - cos_theta*std::pow(sin_theta,2.0), - std::pow(sin_theta,3.0); - - return matrix_cast<float>(M*x); - } - - matrix<float,5,1> rotate_order_4 ( - const matrix<float,5,1>& w, - double cos_theta, - double sin_theta - ) const - { - const double w1 = w(0); - const double w2 = w(1); - const double w3 = w(2); - const double w4 = w(3); - const double w5 = w(4); - matrix<double,5,5> M; - M = w1, w2, w3, w4, w5, - w2, (2*w3-4*w1), (3*w4-3*w2), (4*w5-2*w3), -w4, - w3, (3*w4-3*w2), (6*w1-4*w3+6*w5), (3*w2-3*w4), w3, - w4, (4*w5-2*w3), (3*w2-3*w4), (2*w3-4*w1), -w2, - w5, -w4, w3, -w2, w1; - - matrix<double,5,1> x; - x = std::pow(cos_theta,4.0), - std::pow(cos_theta,3.0)*sin_theta, - std::pow(cos_theta,2.0)*std::pow(sin_theta,2.0), - cos_theta*std::pow(sin_theta,3.0), - std::pow(sin_theta,4.0); - - return matrix_cast<float>(M*x); - } - - matrix<float,6,1> rotate_order_5 ( - const matrix<float,6,1>& w, - double cos_theta, - double sin_theta - ) const - { - const double w1 = w(0); - const double w2 = w(1); - const double w3 = w(2); - const double w4 = w(3); - const double w5 = w(4); - const double w6 = w(5); - matrix<double,6,6> M; - M = w1, w2, w3, w4, w5, w6, - w2, (2*w3-5*w1), (3*w4-4*w2), (4*w5-3*w3), (5*w6-2*w4), -w5, - w3, (3*w4-4*w2), (10*w1-6*w3+6*w5), (6*w2-6*w4+10*w6), (3*w3-4*w5), w4, - w4, (4*w5-3*w3), (6*w2-6*w4+10*w6), (-10*w1+6*w3-6*w5), (3*w4-4*w2), -w3, - w5, (5*w6-2*w4), (3*w3-4*w5), (3*w4-4*w2), (5*w1-2*w3), w2, - w6, -w5, w4, -w3, w2, -w1; - - matrix<double,6,1> x; - x = std::pow(cos_theta,5.0), - std::pow(cos_theta,4.0)*sin_theta, - std::pow(cos_theta,3.0)*std::pow(sin_theta,2.0), - std::pow(cos_theta,2.0)*std::pow(sin_theta,3.0), - cos_theta*std::pow(sin_theta,4.0), - std::pow(sin_theta,5.0); - - return matrix_cast<float>(M*x); - } - - matrix<float,7,1> rotate_order_6 ( - const matrix<float,7,1>& w, - double cos_theta, - double sin_theta - ) const - { - const double w1 = w(0); - const double w2 = w(1); - const double w3 = w(2); - const double w4 = w(3); - const double w5 = w(4); - const double w6 = w(5); - const double w7 = w(6); - matrix<double,7,7> M; - M = w1, w2, w3, w4, w5, w6, w7, - w2, (2*w3-6*w1), (3*w4-5*w2), (4*w5-4*w3), (5*w6-3*w4), (6*w7-2*w5), -w6, - w3, (3*w4-5*w2), (15*w1-8*w3+ 6*w5), ( 10*w2 -9*w4+10*w6), ( 6*w3-8*w5+15*w7), (3*w4-5*w6), w5, - w4, (4*w5-4*w3), (10*w2-9*w4+10*w6), (-20*w1+12*w3-12*w5+20*w7), (-10*w2+9*w4-10*w6), (4*w5-4*w3), -w4, - w5, (5*w6-3*w4), ( 6*w3-8*w5+15*w7), (-10*w2 +9*w4-10*w6), ( 15*w1-8*w3 +6*w5), (5*w2-3*w4), w3, - w6, (6*w7-2*w5), (3*w4-5*w6), (4*w5-4*w3), (5*w2-3*w4), (2*w3-6*w1), -w2, - w7, -w6, w5, -w4, w3, -w2, w1; - - matrix<double,7,1> x; - x = std::pow(cos_theta,6.0), - std::pow(cos_theta,5.0)*sin_theta, - std::pow(cos_theta,4.0)*std::pow(sin_theta,2.0), - std::pow(cos_theta,3.0)*std::pow(sin_theta,3.0), - std::pow(cos_theta,2.0)*std::pow(sin_theta,4.0), - cos_theta*std::pow(sin_theta,5.0), - std::pow(sin_theta,6.0); - - return matrix_cast<float>(M*x); - } - - void rotate_polys ( - const rectangle& rect - ) - /*! - ensures - - rotates all the polynomials in poly_coef so that they are - rotationally invariant - !*/ - { - // The idea here is to use a rotation matrix to rotate the - // coordinate system for the polynomial so that the x axis - // always lines up with the gradient vector (or direction of - // max curvature). This way we can make the representation - // rotation invariant. - - // Note that the rotation matrix is given by: - // [ cos_theta -sin_theta ] - // [ sin_theta cos_theta ] - - // need to offset poly_coef to get past the constant term if there isn't any normalization. - const int off = (normalize) ? 0 : 1; - - for (long r = rect.top(); r <= rect.bottom(); ++r) - { - for (long c = rect.left(); c <= rect.right(); ++c) - { - dlib::vector<double,2> g(poly_coef[off+0][r][c], - poly_coef[off+1][r][c]); - - const double len = g.length(); - if (len != 0) - { - g /= len; - } - else - { - g.x() = 1; - g.y() = 0; - } - // since we normalized g we can find the sin/cos of its angle easily. - const double cos_theta = g.x(); - const double sin_theta = g.y(); - - if (order >= 1) - { - matrix<float,2,1> w; - w = poly_coef[off+0][r][c], - poly_coef[off+1][r][c]; - w = rotate_order_1(w, cos_theta, sin_theta); - poly_coef[off+0][r][c] = w(0); - poly_coef[off+1][r][c] = w(1); - } - if (order >= 2) - { - matrix<float,3,1> w; - w = poly_coef[off+2][r][c], - poly_coef[off+3][r][c], - poly_coef[off+4][r][c]; - w = rotate_order_2(w, cos_theta, sin_theta); - poly_coef[off+2][r][c] = w(0); - poly_coef[off+3][r][c] = w(1); - poly_coef[off+4][r][c] = w(2); - } - if (order >= 3) - { - matrix<float,4,1> w; - w = poly_coef[off+5][r][c], - poly_coef[off+6][r][c], - poly_coef[off+7][r][c], - poly_coef[off+8][r][c]; - w = rotate_order_3(w, cos_theta, sin_theta); - poly_coef[off+5][r][c] = w(0); - poly_coef[off+6][r][c] = w(1); - poly_coef[off+7][r][c] = w(2); - poly_coef[off+8][r][c] = w(3); - } - if (order >= 4) - { - matrix<float,5,1> w; - w = poly_coef[off+9][r][c], - poly_coef[off+10][r][c], - poly_coef[off+11][r][c], - poly_coef[off+12][r][c], - poly_coef[off+13][r][c]; - w = rotate_order_4(w, cos_theta, sin_theta); - poly_coef[off+9][r][c] = w(0); - poly_coef[off+10][r][c] = w(1); - poly_coef[off+11][r][c] = w(2); - poly_coef[off+12][r][c] = w(3); - poly_coef[off+13][r][c] = w(4); - } - if (order >= 5) - { - matrix<float,6,1> w; - w = poly_coef[off+14][r][c], - poly_coef[off+15][r][c], - poly_coef[off+16][r][c], - poly_coef[off+17][r][c], - poly_coef[off+18][r][c], - poly_coef[off+19][r][c]; - w = rotate_order_5(w, cos_theta, sin_theta); - poly_coef[off+14][r][c] = w(0); - poly_coef[off+15][r][c] = w(1); - poly_coef[off+16][r][c] = w(2); - poly_coef[off+17][r][c] = w(3); - poly_coef[off+18][r][c] = w(4); - poly_coef[off+19][r][c] = w(5); - } - if (order >= 6) - { - matrix<float,7,1> w; - w = poly_coef[off+20][r][c], - poly_coef[off+21][r][c], - poly_coef[off+22][r][c], - poly_coef[off+23][r][c], - poly_coef[off+24][r][c], - poly_coef[off+25][r][c], - poly_coef[off+26][r][c]; - w = rotate_order_6(w, cos_theta, sin_theta); - poly_coef[off+20][r][c] = w(0); - poly_coef[off+21][r][c] = w(1); - poly_coef[off+22][r][c] = w(2); - poly_coef[off+23][r][c] = w(3); - poly_coef[off+24][r][c] = w(4); - poly_coef[off+25][r][c] = w(5); - poly_coef[off+26][r][c] = w(6); - } - } - } - - } - - template <typename image_type> - rectangle filter_image ( - const image_type& img, - array2d<float>& out, - const std::vector<separable_filter_type>& filter - ) const - { - rectangle rect = spatially_filter_image_separable_down(downsample, img, out, filter[0].first, filter[0].second); - for (unsigned long i = 1; i < filter.size(); ++i) - { - spatially_filter_image_separable_down(downsample, img, out, filter[i].first, filter[i].second, 1, false, true); - } - return rect; - } - - - - std::vector<std::vector<separable_filter_type> > filters; - - dlib::array<array2d<float> > poly_coef; - long order; - long window_size; - long border_size; - long num_rows; - long num_cols; - - bool normalize; - bool rotation_invariance; - - mutable descriptor_type des; - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_POLY_ImAGE_Hh_ - - diff --git a/ml/dlib/dlib/image_keypoint/poly_image_abstract.h b/ml/dlib/dlib/image_keypoint/poly_image_abstract.h deleted file mode 100644 index 2f17bb31e..000000000 --- a/ml/dlib/dlib/image_keypoint/poly_image_abstract.h +++ /dev/null @@ -1,335 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_POLY_ImAGE_ABSTRACT_Hh_ -#ifdef DLIB_POLY_ImAGE_ABSTRACT_Hh_ - -#include "../algs.h" -#include "../matrix.h" -#include "../geometry/rectangle_abstract.h" -#include <cmath> -#include "../image_processing/generic_image.h" - -namespace dlib -{ - template < - long Downsample - > - class poly_image : noncopyable - { - /*! - REQUIREMENTS ON TEMPLATE PARAMETERS - - Downsample >= 1 - - WHAT THIS OBJECT REPRESENTS - This object is a tool for extracting local feature descriptors from an image. - In particular, it fits polynomials to local pixel patches and allows you to - query the coefficients of these polynomials. Additionally, the coefficients - may be intensity normalized by dividing them by the constant term of the fitted - polynomial and then the constant term is discarded. - - Finally, the user can specify a downsampling rate. If the template argument - Downsample is set to 1 then feature extraction is performed at every pixel of - an input image (except for a small area around the image border). However, - if Downsample is set to 2 then feature extraction is only performed at every - other pixel location. More generally, if Downsample is set to N then feature - extraction is performed only every N pixels. - - THREAD SAFETY - Concurrent access to an instance of this object is not safe and should be protected - by a mutex lock except for the case where you are copying the configuration - (via copy_configuration()) of a poly_image object to many other threads. - In this case, it is safe to copy the configuration of a shared object so long - as no other operations are performed on it. - !*/ - - public: - - typedef matrix<double, 0, 1> descriptor_type; - const static long downsample = Downsample; - - poly_image ( - ); - /*! - ensures - - #get_order() == 3 - - #get_window_size() == 13 - - #size() == 0 - - #uses_normalization() == true - - #is_rotationally_invariant() == false - !*/ - - poly_image( - long order, - long window_size, - bool normalization = true, - bool rotation_invariance = false - ); - /*! - requires - - 1 <= order <= 6 - - window_size >= 3 && window_size is odd - ensures - - #get_order() == order - - #get_window_size() == window_size - - #size() == 0 - - #uses_normalization() == normalization - - #is_rotationally_invariant() == rotation_invariance - !*/ - - void clear ( - ); - /*! - ensures - - this object will have its initial value - !*/ - - void setup ( - long order, - long window_size - ); - /*! - requires - - 1 <= order <= 6 - - window_size >= 3 && window_size is odd - ensures - - #get_order() == order - - #get_window_size() == window_size - !*/ - - long get_order ( - ) const; - /*! - ensures - - returns the order of the polynomial that will be fitted to - each local pixel patch during feature extraction. - !*/ - - long get_window_size ( - ) const; - /*! - ensures - - returns the size of the window used for local feature extraction. - This is the width and height of the window in pixels. - !*/ - - bool uses_normalization ( - ) const; - /*! - ensures - - returns true if the polynomial coefficients are intensity normalized - and false otherwise. - !*/ - - void set_uses_normalization ( - bool normalization - ); - /*! - ensures - - #uses_normalization() == normalization - !*/ - - bool is_rotationally_invariant ( - ); - /*! - ensures - - returns true if the feature extractor will adjust the output so that it - is rotationally invariant. This is done by rotating each patch such that - the gradient vector always points in the same direction. - !*/ - - void set_is_rotationally_invariant ( - bool rotation_invariance - ); - /*! - ensures - - #is_rotationally_invariant() == rotation_invariance - !*/ - - void copy_configuration ( - const poly_image& item - ); - /*! - ensures - - copies all the state information of item into *this, except for state - information populated by load(). More precisely, given two poly_image - objects H1 and H2, the following sequence of instructions should always - result in both of them having the exact same state. - H2.copy_configuration(H1); - H1.load(img); - H2.load(img); - !*/ - - template < - typename image_type - > - inline void load ( - const image_type& img - ); - /*! - requires - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - pixel_traits<typename image_traits<image_type>::pixel_type>::has_alpha == false - ensures - - Performs the feature extraction described in the WHAT THIS OBJECT REPRESENTS - section above. This means after load() finishes you can call (*this)(row,col) - to obtain the polynomial coefficients for an order get_order() polynomial which - was fitted to the image patch get_block_rect(row,col). - - #size() > 0 - !*/ - - void unload( - ); - /*! - ensures - - #nr() == 0 - - #nc() == 0 - - clears only the state information which is populated by load(). For - example, let H be a poly_image object. Then consider the two sequences - of instructions: - Sequence 1: - H.load(img); - H.unload(); - H.load(img); - - Sequence 2: - H.load(img); - Both sequence 1 and sequence 2 should have the same effect on H. - !*/ - - inline size_t size ( - ) const; - /*! - ensures - - returns nr()*nc() - !*/ - - inline long nr ( - ) const; - /*! - ensures - - returns the number of rows in this polynomial feature image - !*/ - - inline long nc ( - ) const; - /*! - ensures - - returns the number of columns in this polynomial feature image - !*/ - - long get_num_dimensions ( - ) const; - /*! - ensures - - returns the number of dimensions in the feature vectors generated by - this object. - - In this case, this will be the number of coefficients in an order - get_order() polynomial, except for the constant term of the polynomial - if uses_normalization() == true. - !*/ - - inline const descriptor_type& operator() ( - long row, - long col - ) const; - /*! - requires - - 0 <= row < nr() - - 0 <= col < nc() - ensures - - returns the descriptor for the polynomial filtering block at the given row and column. - This vector will contain the polynomial coefficients for a polynomial fitted to the - image patch located at get_block_rect(row,col) in the original image given to load(). - - The returned descriptor vector will have get_num_dimensions() elements. - !*/ - - const rectangle get_block_rect ( - long row, - long col - ) const; - /*! - ensures - - returns a rectangle that tells you what part of the original image is associated - with a particular polynomial filter block. That is, what part of the input image - is associated with (*this)(row,col). - - The returned rectangle will be get_window_size() pixels wide and tall. - !*/ - - const point image_to_feat_space ( - const point& p - ) const; - /*! - ensures - - Each local feature is extracted from a certain point in the input image. - This function returns the identity of the local feature corresponding - to the image location p. Or in other words, let P == image_to_feat_space(p), - then (*this)(P.y(),P.x()) == the local feature closest to, or centered at, - the point p in the input image. Note that some image points might not have - corresponding feature locations. E.g. border points or points outside the - image. In these cases the returned point will be outside get_rect(*this). - !*/ - - const rectangle image_to_feat_space ( - const rectangle& rect - ) const; - /*! - ensures - - returns rectangle(image_to_feat_space(rect.tl_corner()), image_to_feat_space(rect.br_corner())); - (i.e. maps a rectangle from image space to feature space) - !*/ - - const point feat_to_image_space ( - const point& p - ) const; - /*! - ensures - - returns the location in the input image space corresponding to the center - of the local feature at point p. In other words, this function computes - the inverse of image_to_feat_space(). Note that it may only do so approximately, - since more than one image location might correspond to the same local feature. - That is, image_to_feat_space() might not be invertible so this function gives - the closest possible result. - !*/ - - const rectangle feat_to_image_space ( - const rectangle& rect - ) const; - /*! - ensures - - return rectangle(feat_to_image_space(rect.tl_corner()), feat_to_image_space(rect.br_corner())); - (i.e. maps a rectangle from feature space to image space) - !*/ - }; - -// ---------------------------------------------------------------------------------------- - - template < - long downsample - > - void serialize ( - const poly_image<downsample>& item, - std::ostream& out - ); - /*! - provides serialization support - !*/ - - template < - long downsample - > - void deserialize ( - poly_image<downsample>& item, - std::istream& in - ); - /*! - provides deserialization support - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_POLY_ImAGE_ABSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/image_keypoint/surf.h b/ml/dlib/dlib/image_keypoint/surf.h deleted file mode 100644 index d12b30840..000000000 --- a/ml/dlib/dlib/image_keypoint/surf.h +++ /dev/null @@ -1,295 +0,0 @@ -// Copyright (C) 2009 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_SURf_H_ -#define DLIB_SURf_H_ - -#include "surf_abstract.h" -#include "hessian_pyramid.h" -#include "../matrix.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - struct surf_point - { - interest_point p; - matrix<double,64,1> des; - double angle; - }; - -// ---------------------------------------------------------------------------------------- - - inline void serialize( - const surf_point& item, - std::ostream& out - ) - { - try - { - serialize(item.p,out); - serialize(item.des,out); - serialize(item.angle,out); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while serializing object of type surf_point"); - } - } - -// ---------------------------------------------------------------------------------------- - - inline void deserialize( - surf_point& item, - std::istream& in - ) - { - try - { - deserialize(item.p,in); - deserialize(item.des,in); - deserialize(item.angle,in); - } - catch (serialization_error& e) - { - throw serialization_error(e.info + "\n while deserializing object of type surf_point"); - } - } - -// ---------------------------------------------------------------------------------------- - - inline double gaussian (double x, double y, double sig) - { - DLIB_ASSERT(sig > 0, - "\tdouble gaussian()" - << "\n\t sig must be bigger than 0" - << "\n\t sig: " << sig - ); - const double sqrt_2_pi = 2.5066282746310002416123552393401041626930; - return 1.0/(sig*sqrt_2_pi) * std::exp( -(x*x + y*y)/(2*sig*sig)); - } - -// ---------------------------------------------------------------------------------------- - - template <typename integral_image_type, typename T> - double compute_dominant_angle ( - const integral_image_type& img, - const dlib::vector<T,2>& center, - const double& scale - ) - { - DLIB_ASSERT(get_rect(img).contains(centered_rect(center, (unsigned long)(17*scale),(unsigned long)(17*scale))) == true && - scale > 0, - "\tdouble compute_dominant_angle(img, center, scale)" - << "\n\tAll arguments to this function must be > 0" - << "\n\t get_rect(img): " << get_rect(img) - << "\n\t center: " << center - << "\n\t scale: " << scale - ); - - - std::vector<double> ang; - std::vector<dlib::vector<double,2> > samples; - - const long sc = static_cast<long>(scale+0.5); - - // accumulate a bunch of angle and vector samples - dlib::vector<double,2> vect; - for (long r = -6; r <= 6; ++r) - { - for (long c = -6; c <= 6; ++c) - { - if (r*r + c*c < 36) - { - // compute a Gaussian weighted gradient and the gradient's angle. - const double gauss = gaussian(c,r, 2.5); - vect.x() = gauss*haar_x(img, sc*point(c,r)+center, 4*sc); - vect.y() = gauss*haar_y(img, sc*point(c,r)+center, 4*sc); - samples.push_back(vect); - ang.push_back(atan2(vect.y(), vect.x())); - } - } - } - - - // now find the dominant direction - double max_length = 0; - double best_ang = 0; - // look at a bunch of pie shaped slices of a circle - const long slices = 45; - const double ang_step = (2*pi)/slices; - for (long ang_i = 0; ang_i < slices; ++ang_i) - { - // compute the bounding angles - double ang1 = ang_step*ang_i - pi; - double ang2 = ang1 + pi/3; - - - // compute sum of all vectors that are within the above two angles - vect.x() = 0; - vect.y() = 0; - for (unsigned long i = 0; i < ang.size(); ++i) - { - if (ang1 <= ang[i] && ang[i] <= ang2) - { - vect += samples[i]; - } - else if (ang2 > pi && (ang[i] >= ang1 || ang[i] <= (-2*pi+ang2))) - { - vect += samples[i]; - } - } - - - // record the angle of the best vectors - if (length_squared(vect) > max_length) - { - max_length = length_squared(vect); - best_ang = atan2(vect.y(), vect.x()); - } - } - - return best_ang; - } - -// ---------------------------------------------------------------------------------------- - - template <typename integral_image_type, typename T, typename MM, typename L> - void compute_surf_descriptor ( - const integral_image_type& img, - const dlib::vector<T,2>& center, - const double scale, - const double angle, - matrix<double,64,1,MM,L>& des - ) - { - DLIB_ASSERT(get_rect(img).contains(centered_rect(center, (unsigned long)(32*scale),(unsigned long)(32*scale))) == true && - scale > 0, - "\tvoid compute_surf_descriptor(img, center, scale, angle)" - << "\n\tAll arguments to this function must be > 0" - << "\n\t get_rect(img): " << get_rect(img) - << "\n\t center: " << center - << "\n\t scale: " << scale - ); - - point_rotator rot(angle); - point_rotator inv_rot(-angle); - - const long sc = static_cast<long>(scale+0.5); - long count = 0; - - // loop over the 4x4 grid of histogram buckets - for (long r = -10; r < 10; r += 5) - { - for (long c = -10; c < 10; c += 5) - { - dlib::vector<double,2> vect, abs_vect, temp; - - // now loop over 25 points in this bucket and sum their features. Note - // that we include 1 pixels worth of padding around the outside of each 5x5 - // cell. This is to help neighboring cells interpolate their counts into - // each other a little bit. - for (long y = r-1; y < r+5+1; ++y) - { - if (y < -10 || y >= 10) - continue; - for (long x = c-1; x < c+5+1; ++x) - { - if (x < -10 || x >= 10) - continue; - - // get the rotated point for this extraction point - point p(rot(point(x,y)*scale) + center); - - // Give points farther from the center of the bucket a lower weight. - const long center_r = r+2; - const long center_c = c+2; - const double weight = 1.0/(4+std::abs(center_r-y) + std::abs(center_c-x)); - - temp.x() = weight*haar_x(img, p, 2*sc); - temp.y() = weight*haar_y(img, p, 2*sc); - - // rotate this vector into alignment with the surf descriptor box - temp = inv_rot(temp); - - vect += temp; - abs_vect += abs(temp); - } - } - - des(count++) = vect.x(); - des(count++) = vect.y(); - des(count++) = abs_vect.x(); - des(count++) = abs_vect.y(); - } - } - - // Return the length normalized descriptor. Add a small number - // to guard against division by zero. - const double len = length(des) + 1e-7; - des = des/len; - } - -// ---------------------------------------------------------------------------------------- - - template <typename image_type> - const std::vector<surf_point> get_surf_points ( - const image_type& img, - long max_points = 10000, - double detection_threshold = 30.0 - ) - { - DLIB_ASSERT(max_points > 0 && detection_threshold >= 0, - "\t std::vector<surf_point> get_surf_points()" - << "\n\t Invalid arguments were given to this function." - << "\n\t max_points: " << max_points - << "\n\t detection_threshold: " << detection_threshold - ); - - // Figure out the proper scalar type we should use to work with these pixels. - typedef typename pixel_traits<typename image_traits<image_type>::pixel_type>::basic_pixel_type bp_type; - typedef typename promote<bp_type>::type working_pixel_type; - - // make an integral image first - integral_image_generic<working_pixel_type> int_img; - int_img.load(img); - - // now make a hessian pyramid - hessian_pyramid pyr; - pyr.build_pyramid(int_img, 4, 6, 2); - - // now get all the interest points from the hessian pyramid - std::vector<interest_point> points; - get_interest_points(pyr, detection_threshold, points); - std::vector<surf_point> spoints; - - // sort all the points by how strong their detect is - std::sort(points.rbegin(), points.rend()); - - // now extract SURF descriptors for the points - surf_point sp; - for (unsigned long i = 0; i < std::min((size_t)max_points,points.size()); ++i) - { - // ignore points that are close to the edge of the image - const double border = 32; - const unsigned long border_size = static_cast<unsigned long>(border*points[i].scale); - if (get_rect(int_img).contains(centered_rect(points[i].center, border_size, border_size))) - { - sp.angle = compute_dominant_angle(int_img, points[i].center, points[i].scale); - compute_surf_descriptor(int_img, points[i].center, points[i].scale, sp.angle, sp.des); - sp.p = points[i]; - - spoints.push_back(sp); - } - } - - return spoints; - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SURf_H_ - diff --git a/ml/dlib/dlib/image_keypoint/surf_abstract.h b/ml/dlib/dlib/image_keypoint/surf_abstract.h deleted file mode 100644 index e539f3e24..000000000 --- a/ml/dlib/dlib/image_keypoint/surf_abstract.h +++ /dev/null @@ -1,163 +0,0 @@ -// Copyright (C) 2009 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_SURf_ABSTRACT_H_ -#ifdef DLIB_SURf_ABSTRACT_H_ - -#include "hessian_pyramid_abstract.h" -#include "../geometry/vector_abstract.h" -#include "../matrix/matrix_abstract.h" -#include "../image_processing/generic_image.h" - -namespace dlib -{ - /* - The functions in this file implement the components of the SURF algorithm - for extracting scale invariant feature descriptors from images. - - For the full story on what this algorithm does and how it works - you should refer to the following papers. - - This is the original paper which introduced the algorithm: - SURF: Speeded Up Robust Features - By Herbert Bay, Tinne Tuytelaars, and Luc Van Gool - - This paper provides a nice detailed overview of how the algorithm works: - Notes on the OpenSURF Library by Christopher Evans - */ - -// ---------------------------------------------------------------------------------------- - - double gaussian ( - double x, - double y, - double sig - ); - /*! - requires - - sig > 0 - ensures - - computes and returns the value of a 2D Gaussian function with mean 0 - and standard deviation sig at the given (x,y) point. - !*/ - -// ---------------------------------------------------------------------------------------- - - template <typename integral_image_type, typename T> - double compute_dominant_angle ( - const integral_image_type& img, - const dlib::vector<T,2>& center, - const double& scale - ); - /*! - requires - - integral_image_type == an object such as dlib::integral_image or another - type that implements the interface defined in image_transforms/integral_image_abstract.h - - scale > 0 - - get_rect(img).contains(centered_rect(center, 17*scale, 17*scale)) == true - (i.e. center can't be within 17*scale pixels of the edge of the image) - ensures - - computes and returns the dominant angle (i.e. the angle of the dominant gradient) - at the given center point and scale in img. - - The returned angle is in radians. Specifically, if the angle is described by - a vector vect then the angle is exactly the value of std::atan2(vect.y(), vect.x()) - !*/ - -// ---------------------------------------------------------------------------------------- - - template <typename integral_image_type, typename T, typename MM, typename L> - void compute_surf_descriptor ( - const integral_image_type& img, - const dlib::vector<T,2>& center, - const double scale, - const double angle, - matrix<double,64,1,MM,L>& des - ) - /*! - requires - - integral_image_type == an object such as dlib::integral_image or another - type that implements the interface defined in image_transforms/integral_image_abstract.h - - scale > 0 - - get_rect(img).contains(centered_rect(center, 32*scale, 32*scale)) == true - (i.e. center can't be within 32*scale pixels of the edge of the image) - ensures - - computes the 64 dimensional SURF descriptor vector of a box centered - at the given center point, tilted at an angle determined by the given - angle, and sized according to the given scale. - - #des == the computed SURF descriptor vector extracted from the img object. - - The angle is measured in radians and measures the degree of counter-clockwise - rotation around the center point. This is the same kind of rotation as is - performed by the dlib::rotate_point() function. - !*/ - -// ---------------------------------------------------------------------------------------- - - struct surf_point - { - /*! - WHAT THIS OBJECT REPRESENTS - This object represents a detected SURF point. The meanings of - its fields are defined below in the get_surf_points() function. - !*/ - - interest_point p; - matrix<double,64,1> des; - double angle; - }; - -// ---------------------------------------------------------------------------------------- - - void serialize ( - const surf_point& item, - std::ostream& out - ); - /*! - provides serialization support - !*/ - - void deserialize ( - surf_point& item, - std::istream& in - ); - /*! - provides serialization support - !*/ - -// ---------------------------------------------------------------------------------------- - - template <typename image_type> - const std::vector<surf_point> get_surf_points ( - const image_type& img, - long max_points = 10000, - double detection_threshold = 30.0 - ); - /*! - requires - - max_points > 0 - - detection_threshold >= 0 - - image_type == an image object that implements the interface defined in - dlib/image_processing/generic_image.h - - Let P denote the type of pixel in img, then we require: - - pixel_traits<P>::has_alpha == false - ensures - - This function runs the complete SURF algorithm on the given input image and - returns the points it found. - - returns a vector V such that: - - V.size() <= max_points - - for all valid i: - - V[i] == a SURF point found in the given input image img - - V[i].p == the interest_point extracted from the hessian pyramid for this - SURF point. - - V[i].des == the SURF descriptor for this point (calculated using - compute_surf_descriptor()) - - V[i].angle == the angle of the SURF box at this point (calculated using - compute_dominant_angle()) - - V[i].p.score >= detection_threshold - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_SURf_ABSTRACT_H_ - - |