diff options
Diffstat (limited to 'ml/dlib/dlib/lsh')
-rw-r--r-- | ml/dlib/dlib/lsh/create_random_projection_hash.h | 232 | ||||
-rw-r--r-- | ml/dlib/dlib/lsh/create_random_projection_hash_abstract.h | 148 | ||||
-rw-r--r-- | ml/dlib/dlib/lsh/hashes.h | 219 | ||||
-rw-r--r-- | ml/dlib/dlib/lsh/hashes_abstract.h | 286 | ||||
-rw-r--r-- | ml/dlib/dlib/lsh/projection_hash.h | 118 | ||||
-rw-r--r-- | ml/dlib/dlib/lsh/projection_hash_abstract.h | 119 |
6 files changed, 0 insertions, 1122 deletions
diff --git a/ml/dlib/dlib/lsh/create_random_projection_hash.h b/ml/dlib/dlib/lsh/create_random_projection_hash.h deleted file mode 100644 index b3aecd9ec..000000000 --- a/ml/dlib/dlib/lsh/create_random_projection_hash.h +++ /dev/null @@ -1,232 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_CREATE_RANDOM_PROJECTION_HAsH_Hh_ -#define DLIB_CREATE_RANDOM_PROJECTION_HAsH_Hh_ - -#include "create_random_projection_hash_abstract.h" -#include "projection_hash.h" -#include "../matrix.h" -#include "../rand.h" -#include "../statistics.h" -#include "../svm.h" -#include <vector> - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename vector_type - > - projection_hash create_random_projection_hash ( - const vector_type& v, - const int bits, - dlib::rand& rnd - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(0 < bits && bits <= 32 && - v.size() > 1, - "\t projection_hash create_random_projection_hash()" - << "\n\t Invalid arguments were given to this function." - << "\n\t bits: " << bits - << "\n\t v.size(): " << v.size() - ); - -#ifdef ENABLE_ASSERTS - for (unsigned long i = 0; i < v.size(); ++i) - { - DLIB_ASSERT(v[0].size() == v[i].size() && v[i].size() > 0 && is_col_vector(v[i]), - "\t projection_hash create_random_projection_hash()" - << "\n\t Invalid arguments were given to this function." - << "\n\t m(0).size(): " << v[0].size() - << "\n\t m("<<i<<").size(): " << v[i].size() - << "\n\t is_col_vector(v["<<i<<"]): " << is_col_vector(v[i]) - ); - } -#endif - - running_covariance<matrix<double> > rc; - for (unsigned long i = 0; i < v.size(); ++i) - rc.add(matrix_cast<double>(v[i])); - - // compute a whitening matrix - matrix<double> whiten = trans(chol(pinv(rc.covariance()))); - - - // hashes - std::vector<unsigned long> h(v.size(),0); - - std::vector<double> vals(v.size(),0); - - // number of hits for each hash value - std::vector<unsigned long> counts; - - std::vector<double> temp; - - // build a random projection matrix - matrix<double> proj(bits, v[0].size()); - for (long r = 0; r < proj.nr(); ++r) - for (long c = 0; c < proj.nc(); ++c) - proj(r,c) = rnd.get_random_gaussian(); - - // merge whitening matrix with projection matrix - proj = proj*whiten; - - matrix<double,0,1> offset(bits); - - - // figure out what the offset values should be - for (int itr = 0; itr < offset.size(); ++itr) - { - counts.assign(static_cast<unsigned long>(std::pow(2.0,bits)), 0); - // count the popularity of each hash value - for (unsigned long i = 0; i < h.size(); ++i) - { - h[i] <<= 1; - counts[h[i]] += 1; - } - - const unsigned long max_h = index_of_max(mat(counts)); - - temp.clear(); - for (unsigned long i = 0; i < v.size(); ++i) - { - vals[i] = dot(rowm(proj,itr), matrix_cast<double>(v[i])); - if (h[i] == max_h) - temp.push_back(vals[i]); - } - - // split down the middle - std::sort(temp.begin(), temp.end()); - const double split = temp[temp.size()/2]; - offset(itr) = -split; - - for (unsigned long i = 0; i < vals.size(); ++i) - { - if (vals[i] - split > 0) - h[i] |= 1; - } - } - - - return projection_hash(proj, offset); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename vector_type - > - projection_hash create_random_projection_hash ( - const vector_type& v, - const int bits - ) - { - dlib::rand rnd; - return create_random_projection_hash(v,bits,rnd); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename vector_type - > - projection_hash create_max_margin_projection_hash ( - const vector_type& v, - const int bits, - const double C, - dlib::rand& rnd - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(0 < bits && bits <= 32 && - v.size() > 1, - "\t projection_hash create_max_margin_projection_hash()" - << "\n\t Invalid arguments were given to this function." - << "\n\t bits: " << bits - << "\n\t v.size(): " << v.size() - ); - -#ifdef ENABLE_ASSERTS - for (unsigned long i = 0; i < v.size(); ++i) - { - DLIB_ASSERT(v[0].size() == v[i].size() && v[i].size() > 0 && is_col_vector(v[i]), - "\t projection_hash create_max_margin_projection_hash()" - << "\n\t Invalid arguments were given to this function." - << "\n\t m(0).size(): " << v[0].size() - << "\n\t m("<<i<<").size(): " << v[i].size() - << "\n\t is_col_vector(v["<<i<<"]): " << is_col_vector(v[i]) - ); - } -#endif - - running_covariance<matrix<double> > rc; - for (unsigned long i = 0; i < v.size(); ++i) - rc.add(matrix_cast<double>(v[i])); - - // compute a whitening matrix - matrix<double> whiten = trans(chol(pinv(rc.covariance()))); - const matrix<double,0,1> meanval = whiten*rc.mean(); - - - - typedef matrix<double,0,1> sample_type; - random_subset_selector<sample_type> training_samples; - random_subset_selector<double> training_labels; - // We set this up to use enough samples to cover the vector space used by elements - // of v. - training_samples.set_max_size(v[0].size()*10); - training_labels.set_max_size(v[0].size()*10); - - matrix<double> proj(bits, v[0].size()); - matrix<double,0,1> offset(bits); - - // learn the random planes and put them into proj and offset. - for (int itr = 0; itr < offset.size(); ++itr) - { - training_samples.make_empty(); - training_labels.make_empty(); - // pick random training data and give each sample a random label. - for (unsigned long i = 0; i < v.size(); ++i) - { - training_samples.add(whiten*v[i]-meanval); - if (rnd.get_random_double() > 0.5) - training_labels.add(+1); - else - training_labels.add(-1); - } - - svm_c_linear_dcd_trainer<linear_kernel<sample_type> > trainer; - trainer.set_c(C); - decision_function<linear_kernel<sample_type> > df = trainer.train(training_samples, training_labels); - offset(itr) = -df.b; - set_rowm(proj,itr) = trans(df.basis_vectors(0)); - } - - - return projection_hash(proj*whiten, offset-proj*meanval); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename vector_type - > - projection_hash create_max_margin_projection_hash ( - const vector_type& v, - const int bits, - const double C = 10 - ) - { - dlib::rand rnd; - return create_max_margin_projection_hash(v,bits,C,rnd); - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_CREATE_RANDOM_PROJECTION_HAsH_Hh_ - diff --git a/ml/dlib/dlib/lsh/create_random_projection_hash_abstract.h b/ml/dlib/dlib/lsh/create_random_projection_hash_abstract.h deleted file mode 100644 index cff55b9a5..000000000 --- a/ml/dlib/dlib/lsh/create_random_projection_hash_abstract.h +++ /dev/null @@ -1,148 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_CREATE_RANDOM_PROJECTION_HAsH_ABSTRACT_Hh_ -#ifdef DLIB_CREATE_RANDOM_PROJECTION_HAsH_ABSTRACT_Hh_ - -#include "projection_hash_abstract.h" -#include "../rand.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename vector_type - > - projection_hash create_random_projection_hash ( - const vector_type& v, - const int bits, - dlib::rand& rnd - ); - /*! - requires - - 0 < bits <= 32 - - v.size() > 1 - - vector_type == a std::vector or compatible type containing dlib::matrix - objects, each representing a column vector of the same size. - - for all valid i, j: - - is_col_vector(v[i]) == true - - v[i].size() > 0 - - v[i].size() == v[j].size() - - i.e. v contains only column vectors and all the column vectors - have the same non-zero length - - rand_type == a type that implements the dlib/rand/rand_kernel_abstract.h interface - ensures - - returns a hash function H such that: - - H.num_hash_bins() == pow(2,bits) - - H will be setup so that it hashes the contents of v such that each bin - ends up with roughly the same number of elements in it. This is - accomplished by picking random hyperplanes passing though the data. In - particular, each plane normal vector is filled with Gaussian random - numbers and we also perform basic centering to ensure the plane passes - though the data. - - This function uses the supplied random number generator, rnd, to drive part - of it's processing. Therefore, giving different random number generators - will produce different outputs. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename vector_type - > - projection_hash create_random_projection_hash ( - const vector_type& v, - const int bits - ); - /*! - requires - - 0 < bits <= 32 - - v.size() > 1 - - vector_type == a std::vector or compatible type containing dlib::matrix - objects, each representing a column vector of the same size. - - for all valid i, j: - - is_col_vector(v[i]) == true - - v[i].size() > 0 - - v[i].size() == v[j].size() - - i.e. v contains only column vectors and all the column vectors - have the same non-zero length - ensures - - returns create_random_projection_hash(v,bits,dlib::rand()) - (i.e. calls the above function with a default initialized random number generator) - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename vector_type - > - projection_hash create_max_margin_projection_hash ( - const vector_type& v, - const int bits, - const double C, - dlib::rand& rnd - ); - /*! - requires - - 0 < bits <= 32 - - v.size() > 1 - - vector_type == a std::vector or compatible type containing dlib::matrix - objects, each representing a column vector of the same size. - - for all valid i, j: - - is_col_vector(v[i]) == true - - v[i].size() > 0 - - v[i].size() == v[j].size() - - i.e. v contains only column vectors and all the column vectors - have the same non-zero length - - rand_type == a type that implements the dlib/rand/rand_kernel_abstract.h interface - ensures - - returns a hash function H such that: - - H.num_hash_bins() == pow(2,bits) - - H will be setup so that it hashes the contents of v such that - each bin ends up with roughly the same number of elements - in it. This is accomplished using a variation on the random hyperplane - generation technique from the paper: - Random Maximum Margin Hashing by Alexis Joly and Olivier Buisson - In particular, we use the svm_c_linear_dcd_trainer to generate planes. - We train it on randomly selected and randomly labeled points from v. - The C SVM parameter is set to the given C argument. - - This function uses the supplied random number generator, rnd, to drive part - of it's processing. Therefore, giving different random number generators - will produce different outputs. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename vector_type - > - projection_hash create_max_margin_projection_hash ( - const vector_type& v, - const int bits, - const double C = 10 - ); - /*! - requires - - 0 < bits <= 32 - - v.size() > 1 - - vector_type == a std::vector or compatible type containing dlib::matrix - objects, each representing a column vector of the same size. - - for all valid i, j: - - is_col_vector(v[i]) == true - - v[i].size() > 0 - - v[i].size() == v[j].size() - - i.e. v contains only column vectors and all the column vectors - have the same non-zero length - ensures - - returns create_max_margin_projection_hash(v,bits,C,dlib::rand()) - (i.e. calls the above function with a default initialized random number generator) - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_CREATE_RANDOM_PROJECTION_HAsH_ABSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/lsh/hashes.h b/ml/dlib/dlib/lsh/hashes.h deleted file mode 100644 index 35053ce4e..000000000 --- a/ml/dlib/dlib/lsh/hashes.h +++ /dev/null @@ -1,219 +0,0 @@ -// Copyright (C) 2013 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_LSH_HAShES_Hh_ -#define DLIB_LSH_HAShES_Hh_ - -#include "hashes_abstract.h" -#include "../hash.h" -#include "../matrix.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - class hash_similar_angles_64 - { - public: - hash_similar_angles_64 ( - ) : seed(0) {} - - hash_similar_angles_64 ( - const uint64 seed_ - ) : seed(seed_) {} - - uint64 get_seed ( - ) const { return seed; } - - - typedef uint64 result_type; - - template < - typename sparse_vector_type - > - typename disable_if<is_matrix<sparse_vector_type>,uint64>::type operator() ( - const sparse_vector_type& v - ) const - { - typedef typename sparse_vector_type::value_type::second_type scalar_type; - - uint64 temp = 0; - for (int i = 0; i < 64; ++i) - { - // compute the dot product between v and a Gaussian random vector. - scalar_type val = 0; - for (typename sparse_vector_type::const_iterator j = v.begin(); j != v.end(); ++j) - val += j->second*gaussian_random_hash(j->first, i, seed); - - if (val > 0) - temp |= 1; - temp <<= 1; - } - return temp; - } - - template <typename EXP> - uint64 operator() ( - const matrix_exp<EXP>& v - ) const - { - typedef typename EXP::type T; - uint64 temp = 0; - for (unsigned long i = 0; i < 64; ++i) - { - if (dot(matrix_cast<T>(gaussian_randm(v.size(),1,i+seed*64)), v) > 0) - temp |= 1; - temp <<= 1; - } - return temp; - } - - unsigned int distance ( - const result_type& a, - const result_type& b - ) const - { - return hamming_distance(a,b); - } - - private: - const uint64 seed; - }; - -// ---------------------------------------------------------------------------------------- - - class hash_similar_angles_128 - { - public: - hash_similar_angles_128 ( - ) : seed(0),hasher1(0), hasher2(1) {} - - hash_similar_angles_128 ( - const uint64 seed_ - ) : seed(seed_),hasher1(2*seed),hasher2(2*seed+1) {} - - uint64 get_seed ( - ) const { return seed; } - - typedef std::pair<uint64,uint64> result_type; - - template < - typename vector_type - > - result_type operator() ( - const vector_type& v - ) const - { - return std::make_pair(hasher1(v), hasher2(v)); - } - - unsigned int distance ( - const result_type& a, - const result_type& b - ) const - { - return hamming_distance(a.first,b.first) + - hamming_distance(a.second,b.second); - } - - private: - const uint64 seed; - hash_similar_angles_64 hasher1; - hash_similar_angles_64 hasher2; - - }; - -// ---------------------------------------------------------------------------------------- - - class hash_similar_angles_256 - { - public: - hash_similar_angles_256 ( - ) : seed(0), hasher1(0), hasher2(1) {} - - hash_similar_angles_256 ( - const uint64 seed_ - ) : seed(seed_),hasher1(2*seed),hasher2(2*seed+1) {} - - uint64 get_seed ( - ) const { return seed; } - - typedef std::pair<uint64,uint64> hash128_type; - typedef std::pair<hash128_type,hash128_type> result_type; - - template < - typename vector_type - > - result_type operator() ( - const vector_type& v - ) const - { - return std::make_pair(hasher1(v), hasher2(v)); - } - - unsigned int distance ( - const result_type& a, - const result_type& b - ) const - { - return hasher1.distance(a.first,b.first) + - hasher1.distance(a.second,b.second); - } - - private: - const uint64 seed; - hash_similar_angles_128 hasher1; - hash_similar_angles_128 hasher2; - - }; - -// ---------------------------------------------------------------------------------------- - - class hash_similar_angles_512 - { - public: - hash_similar_angles_512 ( - ) : seed(0), hasher1(0), hasher2(1) {} - - hash_similar_angles_512 ( - const uint64 seed_ - ) : seed(seed_),hasher1(2*seed),hasher2(2*seed+1) {} - - uint64 get_seed ( - ) const { return seed; } - - - typedef hash_similar_angles_256::result_type hash256_type; - typedef std::pair<hash256_type,hash256_type> result_type; - - template < - typename vector_type - > - result_type operator() ( - const vector_type& v - ) const - { - return std::make_pair(hasher1(v), hasher2(v)); - } - - unsigned int distance ( - const result_type& a, - const result_type& b - ) const - { - return hasher1.distance(a.first,b.first) + - hasher1.distance(a.second,b.second); - } - - private: - const uint64 seed; - hash_similar_angles_256 hasher1; - hash_similar_angles_256 hasher2; - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_LSH_HAShES_Hh_ - diff --git a/ml/dlib/dlib/lsh/hashes_abstract.h b/ml/dlib/dlib/lsh/hashes_abstract.h deleted file mode 100644 index 27f8ddb69..000000000 --- a/ml/dlib/dlib/lsh/hashes_abstract.h +++ /dev/null @@ -1,286 +0,0 @@ -// Copyright (C) 2013 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_LSH_HAShES_ABSTRACT_Hh_ -#ifdef DLIB_LSH_HAShES_ABSTRACT_Hh_ - -#include "../matrix.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - class hash_similar_angles_64 - { - /*! - WHAT THIS OBJECT REPRESENTS - This object is a tool for computing locality sensitive hashes that give - vectors with small angles between each other similar hash values. In - particular, this object creates 64 random planes which pass though the - origin and uses them to create a 64bit hash. To compute the hash for a new - vector, this object checks which side of each plane the vector falls on and - records this information into a 64bit integer. - !*/ - - public: - - hash_similar_angles_64 ( - ); - /*! - ensures - - #get_seed() == 0 - !*/ - - hash_similar_angles_64 ( - const uint64 seed - ); - /*! - ensures - - #get_seed() == seed - !*/ - - uint64 get_seed ( - ) const; - /*! - ensures - - returns the random seed used to generate the random planes used for - hashing. - !*/ - - typedef uint64 result_type; - - template <typename vector_type> - result_type perator() ( - const vector_type& v - ) const; - /*! - requires - - v is an unsorted sparse vector or a dlib matrix representing either a - column or row vector. - ensures - - returns a 64 bit hash of the input vector v. The bits in the hash record - which side of each random plane v falls on. - - !*/ - - unsigned int distance ( - const result_type& a, - const result_type& b - ) const; - /*! - ensures - - returns the Hamming distance between the two hashes given to this - function. That is, we return the number of bits in a and b which differ. - !*/ - }; - -// ---------------------------------------------------------------------------------------- - - struct hash_similar_angles_128 - { - /*! - WHAT THIS OBJECT REPRESENTS - This object is a tool for computing locality sensitive hashes that give - vectors with small angles between each other similar hash values. In - particular, this object creates 128 random planes which pass though the - origin and uses them to create a 128bit hash. To compute the hash for a new - vector, this object checks which side of each plane the vector falls on and - records this information into a 128bit integer. - !*/ - - public: - - hash_similar_angles_128 ( - ); - /*! - ensures - - #get_seed() == 0 - !*/ - - hash_similar_angles_128 ( - const uint64 seed - ); - /*! - ensures - - #get_seed() == seed - !*/ - - uint64 get_seed ( - ) const; - /*! - ensures - - returns the random seed used to generate the random planes used for - hashing. - !*/ - - typedef std::pair<uint64,uint64> result_type; - - template <typename vector_type> - result_type perator() ( - const vector_type& v - ) const; - /*! - requires - - v is an unsorted sparse vector or a dlib matrix representing either a - column or row vector. - ensures - - returns a 128 bit hash of the input vector v. The bits in the hash record - which side of each random plane v falls on. - - !*/ - - unsigned int distance ( - const result_type& a, - const result_type& b - ) const; - /*! - ensures - - returns the Hamming distance between the two hashes given to this - function. That is, we return the number of bits in a and b which differ. - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - struct hash_similar_angles_256 - { - /*! - WHAT THIS OBJECT REPRESENTS - This object is a tool for computing locality sensitive hashes that give - vectors with small angles between each other similar hash values. In - particular, this object creates 256 random planes which pass though the - origin and uses them to create a 256bit hash. To compute the hash for a new - vector, this object checks which side of each plane the vector falls on and - records this information into a 256bit integer. - !*/ - - public: - - hash_similar_angles_256 ( - ); - /*! - ensures - - #get_seed() == 0 - !*/ - - hash_similar_angles_256 ( - const uint64 seed - ); - /*! - ensures - - #get_seed() == seed - !*/ - - uint64 get_seed ( - ) const; - /*! - ensures - - returns the random seed used to generate the random planes used for - hashing. - !*/ - - typedef std::pair<uint64,uint64> hash128_type; - typedef std::pair<hash128_type,hash128_type> result_type; - - template <typename vector_type> - result_type perator() ( - const vector_type& v - ) const; - /*! - requires - - v is an unsorted sparse vector or a dlib matrix representing either a - column or row vector. - ensures - - returns a 256 bit hash of the input vector v. The bits in the hash record - which side of each random plane v falls on. - - !*/ - - unsigned int distance ( - const result_type& a, - const result_type& b - ) const; - /*! - ensures - - returns the Hamming distance between the two hashes given to this - function. That is, we return the number of bits in a and b which differ. - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - struct hash_similar_angles_512 - { - /*! - WHAT THIS OBJECT REPRESENTS - This object is a tool for computing locality sensitive hashes that give - vectors with small angles between each other similar hash values. In - particular, this object creates 512 random planes which pass though the - origin and uses them to create a 512bit hash. To compute the hash for a new - vector, this object checks which side of each plane the vector falls on and - records this information into a 512bit integer. - !*/ - - public: - - hash_similar_angles_512 ( - ); - /*! - ensures - - #get_seed() == 0 - !*/ - - hash_similar_angles_512 ( - const uint64 seed - ); - /*! - ensures - - #get_seed() == seed - !*/ - - uint64 get_seed ( - ) const; - /*! - ensures - - returns the random seed used to generate the random planes used for - hashing. - !*/ - - typedef hash_similar_angles_256::result_type hash256_type; - typedef std::pair<hash256_type,hash256_type> result_type; - - template <typename vector_type> - result_type perator() ( - const vector_type& v - ) const; - /*! - requires - - v is an unsorted sparse vector or a dlib matrix representing either a - column or row vector. - ensures - - returns a 512 bit hash of the input vector v. The bits in the hash record - which side of each random plane v falls on. - - !*/ - - unsigned int distance ( - const result_type& a, - const result_type& b - ) const; - /*! - ensures - - returns the Hamming distance between the two hashes given to this - function. That is, we return the number of bits in a and b which differ. - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_LSH_HAShES_ABSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/lsh/projection_hash.h b/ml/dlib/dlib/lsh/projection_hash.h deleted file mode 100644 index 16de0ba11..000000000 --- a/ml/dlib/dlib/lsh/projection_hash.h +++ /dev/null @@ -1,118 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_PROJECTION_HASh_Hh_ -#define DLIB_PROJECTION_HASh_Hh_ - -#include "projection_hash_abstract.h" -#include "../matrix.h" -#include "../rand.h" -#include <vector> - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - class projection_hash - { - public: - - projection_hash() {} - - template <typename EXP1, typename EXP2> - projection_hash( - const matrix_exp<EXP1>& proj_, - const matrix_exp<EXP2>& offset_ - ) : proj(proj_), offset(offset_) - { - // make sure requires clause is not broken - DLIB_ASSERT(proj.nr() == offset.nr(), - "\t projection_hash::projection_hash()" - << "\n\t Invalid arguments were given to this function." - << "\n\t proj.nr(): " << proj.nr() - << "\n\t offset.nr(): " << offset.nr() - ); - - } - - const matrix<double>& get_projection_matrix ( - ) const { return proj; } - - const matrix<double,0,1>& get_offset_matrix ( - ) const { return offset; } - - unsigned long num_hash_bins ( - ) const - { - return static_cast<unsigned long>(std::pow(2.0, (double)offset.size())); - } - - template <typename EXP> - unsigned long operator() ( - const matrix_exp<EXP>& v - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(is_col_vector(v) && - v.size() == get_projection_matrix().nc() && - v.size() > 0, - "\t unsigned long projection_hash::operator()(v)" - << "\n\t Invalid arguments were given to this function." - << "\n\t is_col_vector(v): " << is_col_vector(v) - << "\n\t get_projection_matrix().nc(): " << get_projection_matrix().nc() - << "\n\t v.size(): " << v.size() - ); - - return do_hash(proj*matrix_cast<double>(v) + offset); - } - - private: - - template <typename EXP> - unsigned long do_hash ( - const matrix_exp<EXP>& v - ) const - { - unsigned long h = 0; - for (long i = 0; i < v.size(); ++i) - { - h <<= 1; - if (v(i) > 0) - h |= 1; - } - return h; - } - - matrix<double> proj; - matrix<double,0,1> offset; - }; - -// ---------------------------------------------------------------------------------------- - - inline void serialize ( - const projection_hash& item, - std::ostream& out - ) - { - serialize(item.get_projection_matrix(), out); - serialize(item.get_offset_matrix(), out); - } - - inline void deserialize ( - projection_hash& item, - std::istream& in - ) - { - matrix<double> proj; - matrix<double,0,1> offset; - deserialize(proj, in); - deserialize(offset, in); - item = projection_hash(proj, offset); - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_PROJECTION_HASh_Hh_ - diff --git a/ml/dlib/dlib/lsh/projection_hash_abstract.h b/ml/dlib/dlib/lsh/projection_hash_abstract.h deleted file mode 100644 index abe78d10c..000000000 --- a/ml/dlib/dlib/lsh/projection_hash_abstract.h +++ /dev/null @@ -1,119 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_PROJECTION_HASh_ABSTRACT_Hh_ -#ifdef DLIB_PROJECTION_HASh_ABSTRACT_Hh_ - -#include "../matrix.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - class projection_hash - { - /*! - WHAT THIS OBJECT REPRESENTS - This is a tool for hashing elements of a vector space into the integers. - It is intended to represent locality sensitive hashing functions such as - the popular random projection hashing method. - - In particular, it represents hash functions of the form: - hash bit 0 = sign(rowm(P*v + O,0)) - hash bit 1 = sign(rowm(P*v + O,1)) - hash bit 2 = sign(rowm(P*v + O,2)) - ... - Where v is the vector to be hashed. The parameters of the projection - hash are the P and O matrices. - - THREAD SAFETY - The const members of this object can be called concurrently from multiple - threads, however, any operation that modifies the state of an instance of - this object must serialize access to that instance. - !*/ - public: - - projection_hash( - ); - /*! - ensures - - #get_projection_matrix().size() == 0 - - #get_offset_matrix().size() == 0 - !*/ - - template <typename EXP1, typename EXP2> - projection_hash( - const matrix_exp<EXP1>& proj, - const matrix_exp<EXP2>& offset - ); - /*! - requires - - proj.nr() == offset.nr() - ensures - - #get_projection_matrix() == proj - - #get_offset_matrix() == offset - !*/ - - const matrix<double>& get_projection_matrix ( - ) const; - /*! - ensures - - returns the P matrix discussed above in the WHAT THIS OBJECT REPRESENTS - section. - !*/ - - const matrix<double,0,1>& get_offset_matrix ( - ) const; - /*! - ensures - - returns the O matrix discussed above in the WHAT THIS OBJECT REPRESENTS - section. - !*/ - - unsigned long num_hash_bins ( - ) const; - /*! - ensures - - returns the number of possible outputs from this hashing function. - - Specifically, returns: std::pow(2, get_offset_matrix().size()) - !*/ - - template <typename EXP> - unsigned long operator() ( - const matrix_exp<EXP>& v - ) const; - /*! - requires - - is_col_vector(v) == true - - v.size() == get_projection_matrix().nc() - - v.size() > 0 - ensures - - hashes v into the range [0, num_hash_bins()) using the method - discussed in the WHAT THIS OBJECT REPRESENTS section. - !*/ - }; - -// ---------------------------------------------------------------------------------------- - - void serialize ( - const projection_hash& item, - std::ostream& out - ); - /*! - provides serialization support - !*/ - - void deserialize ( - projection_hash& item, - std::istream& in - ); - /*! - provides deserialization support - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_PROJECTION_HASh_ABSTRACT_Hh_ - |