summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/lsh
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/dlib/lsh')
-rw-r--r--ml/dlib/dlib/lsh/create_random_projection_hash.h232
-rw-r--r--ml/dlib/dlib/lsh/create_random_projection_hash_abstract.h148
-rw-r--r--ml/dlib/dlib/lsh/hashes.h219
-rw-r--r--ml/dlib/dlib/lsh/hashes_abstract.h286
-rw-r--r--ml/dlib/dlib/lsh/projection_hash.h118
-rw-r--r--ml/dlib/dlib/lsh/projection_hash_abstract.h119
6 files changed, 0 insertions, 1122 deletions
diff --git a/ml/dlib/dlib/lsh/create_random_projection_hash.h b/ml/dlib/dlib/lsh/create_random_projection_hash.h
deleted file mode 100644
index b3aecd9ec..000000000
--- a/ml/dlib/dlib/lsh/create_random_projection_hash.h
+++ /dev/null
@@ -1,232 +0,0 @@
-// Copyright (C) 2011 Davis E. King (davis@dlib.net)
-// License: Boost Software License See LICENSE.txt for the full license.
-#ifndef DLIB_CREATE_RANDOM_PROJECTION_HAsH_Hh_
-#define DLIB_CREATE_RANDOM_PROJECTION_HAsH_Hh_
-
-#include "create_random_projection_hash_abstract.h"
-#include "projection_hash.h"
-#include "../matrix.h"
-#include "../rand.h"
-#include "../statistics.h"
-#include "../svm.h"
-#include <vector>
-
-namespace dlib
-{
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename vector_type
- >
- projection_hash create_random_projection_hash (
- const vector_type& v,
- const int bits,
- dlib::rand& rnd
- )
- {
- // make sure requires clause is not broken
- DLIB_ASSERT(0 < bits && bits <= 32 &&
- v.size() > 1,
- "\t projection_hash create_random_projection_hash()"
- << "\n\t Invalid arguments were given to this function."
- << "\n\t bits: " << bits
- << "\n\t v.size(): " << v.size()
- );
-
-#ifdef ENABLE_ASSERTS
- for (unsigned long i = 0; i < v.size(); ++i)
- {
- DLIB_ASSERT(v[0].size() == v[i].size() && v[i].size() > 0 && is_col_vector(v[i]),
- "\t projection_hash create_random_projection_hash()"
- << "\n\t Invalid arguments were given to this function."
- << "\n\t m(0).size(): " << v[0].size()
- << "\n\t m("<<i<<").size(): " << v[i].size()
- << "\n\t is_col_vector(v["<<i<<"]): " << is_col_vector(v[i])
- );
- }
-#endif
-
- running_covariance<matrix<double> > rc;
- for (unsigned long i = 0; i < v.size(); ++i)
- rc.add(matrix_cast<double>(v[i]));
-
- // compute a whitening matrix
- matrix<double> whiten = trans(chol(pinv(rc.covariance())));
-
-
- // hashes
- std::vector<unsigned long> h(v.size(),0);
-
- std::vector<double> vals(v.size(),0);
-
- // number of hits for each hash value
- std::vector<unsigned long> counts;
-
- std::vector<double> temp;
-
- // build a random projection matrix
- matrix<double> proj(bits, v[0].size());
- for (long r = 0; r < proj.nr(); ++r)
- for (long c = 0; c < proj.nc(); ++c)
- proj(r,c) = rnd.get_random_gaussian();
-
- // merge whitening matrix with projection matrix
- proj = proj*whiten;
-
- matrix<double,0,1> offset(bits);
-
-
- // figure out what the offset values should be
- for (int itr = 0; itr < offset.size(); ++itr)
- {
- counts.assign(static_cast<unsigned long>(std::pow(2.0,bits)), 0);
- // count the popularity of each hash value
- for (unsigned long i = 0; i < h.size(); ++i)
- {
- h[i] <<= 1;
- counts[h[i]] += 1;
- }
-
- const unsigned long max_h = index_of_max(mat(counts));
-
- temp.clear();
- for (unsigned long i = 0; i < v.size(); ++i)
- {
- vals[i] = dot(rowm(proj,itr), matrix_cast<double>(v[i]));
- if (h[i] == max_h)
- temp.push_back(vals[i]);
- }
-
- // split down the middle
- std::sort(temp.begin(), temp.end());
- const double split = temp[temp.size()/2];
- offset(itr) = -split;
-
- for (unsigned long i = 0; i < vals.size(); ++i)
- {
- if (vals[i] - split > 0)
- h[i] |= 1;
- }
- }
-
-
- return projection_hash(proj, offset);
- }
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename vector_type
- >
- projection_hash create_random_projection_hash (
- const vector_type& v,
- const int bits
- )
- {
- dlib::rand rnd;
- return create_random_projection_hash(v,bits,rnd);
- }
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename vector_type
- >
- projection_hash create_max_margin_projection_hash (
- const vector_type& v,
- const int bits,
- const double C,
- dlib::rand& rnd
- )
- {
- // make sure requires clause is not broken
- DLIB_ASSERT(0 < bits && bits <= 32 &&
- v.size() > 1,
- "\t projection_hash create_max_margin_projection_hash()"
- << "\n\t Invalid arguments were given to this function."
- << "\n\t bits: " << bits
- << "\n\t v.size(): " << v.size()
- );
-
-#ifdef ENABLE_ASSERTS
- for (unsigned long i = 0; i < v.size(); ++i)
- {
- DLIB_ASSERT(v[0].size() == v[i].size() && v[i].size() > 0 && is_col_vector(v[i]),
- "\t projection_hash create_max_margin_projection_hash()"
- << "\n\t Invalid arguments were given to this function."
- << "\n\t m(0).size(): " << v[0].size()
- << "\n\t m("<<i<<").size(): " << v[i].size()
- << "\n\t is_col_vector(v["<<i<<"]): " << is_col_vector(v[i])
- );
- }
-#endif
-
- running_covariance<matrix<double> > rc;
- for (unsigned long i = 0; i < v.size(); ++i)
- rc.add(matrix_cast<double>(v[i]));
-
- // compute a whitening matrix
- matrix<double> whiten = trans(chol(pinv(rc.covariance())));
- const matrix<double,0,1> meanval = whiten*rc.mean();
-
-
-
- typedef matrix<double,0,1> sample_type;
- random_subset_selector<sample_type> training_samples;
- random_subset_selector<double> training_labels;
- // We set this up to use enough samples to cover the vector space used by elements
- // of v.
- training_samples.set_max_size(v[0].size()*10);
- training_labels.set_max_size(v[0].size()*10);
-
- matrix<double> proj(bits, v[0].size());
- matrix<double,0,1> offset(bits);
-
- // learn the random planes and put them into proj and offset.
- for (int itr = 0; itr < offset.size(); ++itr)
- {
- training_samples.make_empty();
- training_labels.make_empty();
- // pick random training data and give each sample a random label.
- for (unsigned long i = 0; i < v.size(); ++i)
- {
- training_samples.add(whiten*v[i]-meanval);
- if (rnd.get_random_double() > 0.5)
- training_labels.add(+1);
- else
- training_labels.add(-1);
- }
-
- svm_c_linear_dcd_trainer<linear_kernel<sample_type> > trainer;
- trainer.set_c(C);
- decision_function<linear_kernel<sample_type> > df = trainer.train(training_samples, training_labels);
- offset(itr) = -df.b;
- set_rowm(proj,itr) = trans(df.basis_vectors(0));
- }
-
-
- return projection_hash(proj*whiten, offset-proj*meanval);
- }
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename vector_type
- >
- projection_hash create_max_margin_projection_hash (
- const vector_type& v,
- const int bits,
- const double C = 10
- )
- {
- dlib::rand rnd;
- return create_max_margin_projection_hash(v,bits,C,rnd);
- }
-
-// ----------------------------------------------------------------------------------------
-
-}
-
-#endif // DLIB_CREATE_RANDOM_PROJECTION_HAsH_Hh_
-
diff --git a/ml/dlib/dlib/lsh/create_random_projection_hash_abstract.h b/ml/dlib/dlib/lsh/create_random_projection_hash_abstract.h
deleted file mode 100644
index cff55b9a5..000000000
--- a/ml/dlib/dlib/lsh/create_random_projection_hash_abstract.h
+++ /dev/null
@@ -1,148 +0,0 @@
-// Copyright (C) 2011 Davis E. King (davis@dlib.net)
-// License: Boost Software License See LICENSE.txt for the full license.
-#undef DLIB_CREATE_RANDOM_PROJECTION_HAsH_ABSTRACT_Hh_
-#ifdef DLIB_CREATE_RANDOM_PROJECTION_HAsH_ABSTRACT_Hh_
-
-#include "projection_hash_abstract.h"
-#include "../rand.h"
-
-namespace dlib
-{
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename vector_type
- >
- projection_hash create_random_projection_hash (
- const vector_type& v,
- const int bits,
- dlib::rand& rnd
- );
- /*!
- requires
- - 0 < bits <= 32
- - v.size() > 1
- - vector_type == a std::vector or compatible type containing dlib::matrix
- objects, each representing a column vector of the same size.
- - for all valid i, j:
- - is_col_vector(v[i]) == true
- - v[i].size() > 0
- - v[i].size() == v[j].size()
- - i.e. v contains only column vectors and all the column vectors
- have the same non-zero length
- - rand_type == a type that implements the dlib/rand/rand_kernel_abstract.h interface
- ensures
- - returns a hash function H such that:
- - H.num_hash_bins() == pow(2,bits)
- - H will be setup so that it hashes the contents of v such that each bin
- ends up with roughly the same number of elements in it. This is
- accomplished by picking random hyperplanes passing though the data. In
- particular, each plane normal vector is filled with Gaussian random
- numbers and we also perform basic centering to ensure the plane passes
- though the data.
- - This function uses the supplied random number generator, rnd, to drive part
- of it's processing. Therefore, giving different random number generators
- will produce different outputs.
- !*/
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename vector_type
- >
- projection_hash create_random_projection_hash (
- const vector_type& v,
- const int bits
- );
- /*!
- requires
- - 0 < bits <= 32
- - v.size() > 1
- - vector_type == a std::vector or compatible type containing dlib::matrix
- objects, each representing a column vector of the same size.
- - for all valid i, j:
- - is_col_vector(v[i]) == true
- - v[i].size() > 0
- - v[i].size() == v[j].size()
- - i.e. v contains only column vectors and all the column vectors
- have the same non-zero length
- ensures
- - returns create_random_projection_hash(v,bits,dlib::rand())
- (i.e. calls the above function with a default initialized random number generator)
- !*/
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename vector_type
- >
- projection_hash create_max_margin_projection_hash (
- const vector_type& v,
- const int bits,
- const double C,
- dlib::rand& rnd
- );
- /*!
- requires
- - 0 < bits <= 32
- - v.size() > 1
- - vector_type == a std::vector or compatible type containing dlib::matrix
- objects, each representing a column vector of the same size.
- - for all valid i, j:
- - is_col_vector(v[i]) == true
- - v[i].size() > 0
- - v[i].size() == v[j].size()
- - i.e. v contains only column vectors and all the column vectors
- have the same non-zero length
- - rand_type == a type that implements the dlib/rand/rand_kernel_abstract.h interface
- ensures
- - returns a hash function H such that:
- - H.num_hash_bins() == pow(2,bits)
- - H will be setup so that it hashes the contents of v such that
- each bin ends up with roughly the same number of elements
- in it. This is accomplished using a variation on the random hyperplane
- generation technique from the paper:
- Random Maximum Margin Hashing by Alexis Joly and Olivier Buisson
- In particular, we use the svm_c_linear_dcd_trainer to generate planes.
- We train it on randomly selected and randomly labeled points from v.
- The C SVM parameter is set to the given C argument.
- - This function uses the supplied random number generator, rnd, to drive part
- of it's processing. Therefore, giving different random number generators
- will produce different outputs.
- !*/
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename vector_type
- >
- projection_hash create_max_margin_projection_hash (
- const vector_type& v,
- const int bits,
- const double C = 10
- );
- /*!
- requires
- - 0 < bits <= 32
- - v.size() > 1
- - vector_type == a std::vector or compatible type containing dlib::matrix
- objects, each representing a column vector of the same size.
- - for all valid i, j:
- - is_col_vector(v[i]) == true
- - v[i].size() > 0
- - v[i].size() == v[j].size()
- - i.e. v contains only column vectors and all the column vectors
- have the same non-zero length
- ensures
- - returns create_max_margin_projection_hash(v,bits,C,dlib::rand())
- (i.e. calls the above function with a default initialized random number generator)
- !*/
-
-// ----------------------------------------------------------------------------------------
-
-}
-
-#endif // DLIB_CREATE_RANDOM_PROJECTION_HAsH_ABSTRACT_Hh_
-
-
diff --git a/ml/dlib/dlib/lsh/hashes.h b/ml/dlib/dlib/lsh/hashes.h
deleted file mode 100644
index 35053ce4e..000000000
--- a/ml/dlib/dlib/lsh/hashes.h
+++ /dev/null
@@ -1,219 +0,0 @@
-// Copyright (C) 2013 Davis E. King (davis@dlib.net)
-// License: Boost Software License See LICENSE.txt for the full license.
-#ifndef DLIB_LSH_HAShES_Hh_
-#define DLIB_LSH_HAShES_Hh_
-
-#include "hashes_abstract.h"
-#include "../hash.h"
-#include "../matrix.h"
-
-namespace dlib
-{
-
-// ----------------------------------------------------------------------------------------
-
- class hash_similar_angles_64
- {
- public:
- hash_similar_angles_64 (
- ) : seed(0) {}
-
- hash_similar_angles_64 (
- const uint64 seed_
- ) : seed(seed_) {}
-
- uint64 get_seed (
- ) const { return seed; }
-
-
- typedef uint64 result_type;
-
- template <
- typename sparse_vector_type
- >
- typename disable_if<is_matrix<sparse_vector_type>,uint64>::type operator() (
- const sparse_vector_type& v
- ) const
- {
- typedef typename sparse_vector_type::value_type::second_type scalar_type;
-
- uint64 temp = 0;
- for (int i = 0; i < 64; ++i)
- {
- // compute the dot product between v and a Gaussian random vector.
- scalar_type val = 0;
- for (typename sparse_vector_type::const_iterator j = v.begin(); j != v.end(); ++j)
- val += j->second*gaussian_random_hash(j->first, i, seed);
-
- if (val > 0)
- temp |= 1;
- temp <<= 1;
- }
- return temp;
- }
-
- template <typename EXP>
- uint64 operator() (
- const matrix_exp<EXP>& v
- ) const
- {
- typedef typename EXP::type T;
- uint64 temp = 0;
- for (unsigned long i = 0; i < 64; ++i)
- {
- if (dot(matrix_cast<T>(gaussian_randm(v.size(),1,i+seed*64)), v) > 0)
- temp |= 1;
- temp <<= 1;
- }
- return temp;
- }
-
- unsigned int distance (
- const result_type& a,
- const result_type& b
- ) const
- {
- return hamming_distance(a,b);
- }
-
- private:
- const uint64 seed;
- };
-
-// ----------------------------------------------------------------------------------------
-
- class hash_similar_angles_128
- {
- public:
- hash_similar_angles_128 (
- ) : seed(0),hasher1(0), hasher2(1) {}
-
- hash_similar_angles_128 (
- const uint64 seed_
- ) : seed(seed_),hasher1(2*seed),hasher2(2*seed+1) {}
-
- uint64 get_seed (
- ) const { return seed; }
-
- typedef std::pair<uint64,uint64> result_type;
-
- template <
- typename vector_type
- >
- result_type operator() (
- const vector_type& v
- ) const
- {
- return std::make_pair(hasher1(v), hasher2(v));
- }
-
- unsigned int distance (
- const result_type& a,
- const result_type& b
- ) const
- {
- return hamming_distance(a.first,b.first) +
- hamming_distance(a.second,b.second);
- }
-
- private:
- const uint64 seed;
- hash_similar_angles_64 hasher1;
- hash_similar_angles_64 hasher2;
-
- };
-
-// ----------------------------------------------------------------------------------------
-
- class hash_similar_angles_256
- {
- public:
- hash_similar_angles_256 (
- ) : seed(0), hasher1(0), hasher2(1) {}
-
- hash_similar_angles_256 (
- const uint64 seed_
- ) : seed(seed_),hasher1(2*seed),hasher2(2*seed+1) {}
-
- uint64 get_seed (
- ) const { return seed; }
-
- typedef std::pair<uint64,uint64> hash128_type;
- typedef std::pair<hash128_type,hash128_type> result_type;
-
- template <
- typename vector_type
- >
- result_type operator() (
- const vector_type& v
- ) const
- {
- return std::make_pair(hasher1(v), hasher2(v));
- }
-
- unsigned int distance (
- const result_type& a,
- const result_type& b
- ) const
- {
- return hasher1.distance(a.first,b.first) +
- hasher1.distance(a.second,b.second);
- }
-
- private:
- const uint64 seed;
- hash_similar_angles_128 hasher1;
- hash_similar_angles_128 hasher2;
-
- };
-
-// ----------------------------------------------------------------------------------------
-
- class hash_similar_angles_512
- {
- public:
- hash_similar_angles_512 (
- ) : seed(0), hasher1(0), hasher2(1) {}
-
- hash_similar_angles_512 (
- const uint64 seed_
- ) : seed(seed_),hasher1(2*seed),hasher2(2*seed+1) {}
-
- uint64 get_seed (
- ) const { return seed; }
-
-
- typedef hash_similar_angles_256::result_type hash256_type;
- typedef std::pair<hash256_type,hash256_type> result_type;
-
- template <
- typename vector_type
- >
- result_type operator() (
- const vector_type& v
- ) const
- {
- return std::make_pair(hasher1(v), hasher2(v));
- }
-
- unsigned int distance (
- const result_type& a,
- const result_type& b
- ) const
- {
- return hasher1.distance(a.first,b.first) +
- hasher1.distance(a.second,b.second);
- }
-
- private:
- const uint64 seed;
- hash_similar_angles_256 hasher1;
- hash_similar_angles_256 hasher2;
- };
-
-// ----------------------------------------------------------------------------------------
-
-}
-
-#endif // DLIB_LSH_HAShES_Hh_
-
diff --git a/ml/dlib/dlib/lsh/hashes_abstract.h b/ml/dlib/dlib/lsh/hashes_abstract.h
deleted file mode 100644
index 27f8ddb69..000000000
--- a/ml/dlib/dlib/lsh/hashes_abstract.h
+++ /dev/null
@@ -1,286 +0,0 @@
-// Copyright (C) 2013 Davis E. King (davis@dlib.net)
-// License: Boost Software License See LICENSE.txt for the full license.
-#undef DLIB_LSH_HAShES_ABSTRACT_Hh_
-#ifdef DLIB_LSH_HAShES_ABSTRACT_Hh_
-
-#include "../matrix.h"
-
-namespace dlib
-{
-
-// ----------------------------------------------------------------------------------------
-
- class hash_similar_angles_64
- {
- /*!
- WHAT THIS OBJECT REPRESENTS
- This object is a tool for computing locality sensitive hashes that give
- vectors with small angles between each other similar hash values. In
- particular, this object creates 64 random planes which pass though the
- origin and uses them to create a 64bit hash. To compute the hash for a new
- vector, this object checks which side of each plane the vector falls on and
- records this information into a 64bit integer.
- !*/
-
- public:
-
- hash_similar_angles_64 (
- );
- /*!
- ensures
- - #get_seed() == 0
- !*/
-
- hash_similar_angles_64 (
- const uint64 seed
- );
- /*!
- ensures
- - #get_seed() == seed
- !*/
-
- uint64 get_seed (
- ) const;
- /*!
- ensures
- - returns the random seed used to generate the random planes used for
- hashing.
- !*/
-
- typedef uint64 result_type;
-
- template <typename vector_type>
- result_type perator() (
- const vector_type& v
- ) const;
- /*!
- requires
- - v is an unsorted sparse vector or a dlib matrix representing either a
- column or row vector.
- ensures
- - returns a 64 bit hash of the input vector v. The bits in the hash record
- which side of each random plane v falls on.
-
- !*/
-
- unsigned int distance (
- const result_type& a,
- const result_type& b
- ) const;
- /*!
- ensures
- - returns the Hamming distance between the two hashes given to this
- function. That is, we return the number of bits in a and b which differ.
- !*/
- };
-
-// ----------------------------------------------------------------------------------------
-
- struct hash_similar_angles_128
- {
- /*!
- WHAT THIS OBJECT REPRESENTS
- This object is a tool for computing locality sensitive hashes that give
- vectors with small angles between each other similar hash values. In
- particular, this object creates 128 random planes which pass though the
- origin and uses them to create a 128bit hash. To compute the hash for a new
- vector, this object checks which side of each plane the vector falls on and
- records this information into a 128bit integer.
- !*/
-
- public:
-
- hash_similar_angles_128 (
- );
- /*!
- ensures
- - #get_seed() == 0
- !*/
-
- hash_similar_angles_128 (
- const uint64 seed
- );
- /*!
- ensures
- - #get_seed() == seed
- !*/
-
- uint64 get_seed (
- ) const;
- /*!
- ensures
- - returns the random seed used to generate the random planes used for
- hashing.
- !*/
-
- typedef std::pair<uint64,uint64> result_type;
-
- template <typename vector_type>
- result_type perator() (
- const vector_type& v
- ) const;
- /*!
- requires
- - v is an unsorted sparse vector or a dlib matrix representing either a
- column or row vector.
- ensures
- - returns a 128 bit hash of the input vector v. The bits in the hash record
- which side of each random plane v falls on.
-
- !*/
-
- unsigned int distance (
- const result_type& a,
- const result_type& b
- ) const;
- /*!
- ensures
- - returns the Hamming distance between the two hashes given to this
- function. That is, we return the number of bits in a and b which differ.
- !*/
-
- };
-
-// ----------------------------------------------------------------------------------------
-
- struct hash_similar_angles_256
- {
- /*!
- WHAT THIS OBJECT REPRESENTS
- This object is a tool for computing locality sensitive hashes that give
- vectors with small angles between each other similar hash values. In
- particular, this object creates 256 random planes which pass though the
- origin and uses them to create a 256bit hash. To compute the hash for a new
- vector, this object checks which side of each plane the vector falls on and
- records this information into a 256bit integer.
- !*/
-
- public:
-
- hash_similar_angles_256 (
- );
- /*!
- ensures
- - #get_seed() == 0
- !*/
-
- hash_similar_angles_256 (
- const uint64 seed
- );
- /*!
- ensures
- - #get_seed() == seed
- !*/
-
- uint64 get_seed (
- ) const;
- /*!
- ensures
- - returns the random seed used to generate the random planes used for
- hashing.
- !*/
-
- typedef std::pair<uint64,uint64> hash128_type;
- typedef std::pair<hash128_type,hash128_type> result_type;
-
- template <typename vector_type>
- result_type perator() (
- const vector_type& v
- ) const;
- /*!
- requires
- - v is an unsorted sparse vector or a dlib matrix representing either a
- column or row vector.
- ensures
- - returns a 256 bit hash of the input vector v. The bits in the hash record
- which side of each random plane v falls on.
-
- !*/
-
- unsigned int distance (
- const result_type& a,
- const result_type& b
- ) const;
- /*!
- ensures
- - returns the Hamming distance between the two hashes given to this
- function. That is, we return the number of bits in a and b which differ.
- !*/
-
- };
-
-// ----------------------------------------------------------------------------------------
-
- struct hash_similar_angles_512
- {
- /*!
- WHAT THIS OBJECT REPRESENTS
- This object is a tool for computing locality sensitive hashes that give
- vectors with small angles between each other similar hash values. In
- particular, this object creates 512 random planes which pass though the
- origin and uses them to create a 512bit hash. To compute the hash for a new
- vector, this object checks which side of each plane the vector falls on and
- records this information into a 512bit integer.
- !*/
-
- public:
-
- hash_similar_angles_512 (
- );
- /*!
- ensures
- - #get_seed() == 0
- !*/
-
- hash_similar_angles_512 (
- const uint64 seed
- );
- /*!
- ensures
- - #get_seed() == seed
- !*/
-
- uint64 get_seed (
- ) const;
- /*!
- ensures
- - returns the random seed used to generate the random planes used for
- hashing.
- !*/
-
- typedef hash_similar_angles_256::result_type hash256_type;
- typedef std::pair<hash256_type,hash256_type> result_type;
-
- template <typename vector_type>
- result_type perator() (
- const vector_type& v
- ) const;
- /*!
- requires
- - v is an unsorted sparse vector or a dlib matrix representing either a
- column or row vector.
- ensures
- - returns a 512 bit hash of the input vector v. The bits in the hash record
- which side of each random plane v falls on.
-
- !*/
-
- unsigned int distance (
- const result_type& a,
- const result_type& b
- ) const;
- /*!
- ensures
- - returns the Hamming distance between the two hashes given to this
- function. That is, we return the number of bits in a and b which differ.
- !*/
-
- };
-
-// ----------------------------------------------------------------------------------------
-
-}
-
-#endif // DLIB_LSH_HAShES_ABSTRACT_Hh_
-
-
diff --git a/ml/dlib/dlib/lsh/projection_hash.h b/ml/dlib/dlib/lsh/projection_hash.h
deleted file mode 100644
index 16de0ba11..000000000
--- a/ml/dlib/dlib/lsh/projection_hash.h
+++ /dev/null
@@ -1,118 +0,0 @@
-// Copyright (C) 2011 Davis E. King (davis@dlib.net)
-// License: Boost Software License See LICENSE.txt for the full license.
-#ifndef DLIB_PROJECTION_HASh_Hh_
-#define DLIB_PROJECTION_HASh_Hh_
-
-#include "projection_hash_abstract.h"
-#include "../matrix.h"
-#include "../rand.h"
-#include <vector>
-
-namespace dlib
-{
-
-// ----------------------------------------------------------------------------------------
-
- class projection_hash
- {
- public:
-
- projection_hash() {}
-
- template <typename EXP1, typename EXP2>
- projection_hash(
- const matrix_exp<EXP1>& proj_,
- const matrix_exp<EXP2>& offset_
- ) : proj(proj_), offset(offset_)
- {
- // make sure requires clause is not broken
- DLIB_ASSERT(proj.nr() == offset.nr(),
- "\t projection_hash::projection_hash()"
- << "\n\t Invalid arguments were given to this function."
- << "\n\t proj.nr(): " << proj.nr()
- << "\n\t offset.nr(): " << offset.nr()
- );
-
- }
-
- const matrix<double>& get_projection_matrix (
- ) const { return proj; }
-
- const matrix<double,0,1>& get_offset_matrix (
- ) const { return offset; }
-
- unsigned long num_hash_bins (
- ) const
- {
- return static_cast<unsigned long>(std::pow(2.0, (double)offset.size()));
- }
-
- template <typename EXP>
- unsigned long operator() (
- const matrix_exp<EXP>& v
- ) const
- {
- // make sure requires clause is not broken
- DLIB_ASSERT(is_col_vector(v) &&
- v.size() == get_projection_matrix().nc() &&
- v.size() > 0,
- "\t unsigned long projection_hash::operator()(v)"
- << "\n\t Invalid arguments were given to this function."
- << "\n\t is_col_vector(v): " << is_col_vector(v)
- << "\n\t get_projection_matrix().nc(): " << get_projection_matrix().nc()
- << "\n\t v.size(): " << v.size()
- );
-
- return do_hash(proj*matrix_cast<double>(v) + offset);
- }
-
- private:
-
- template <typename EXP>
- unsigned long do_hash (
- const matrix_exp<EXP>& v
- ) const
- {
- unsigned long h = 0;
- for (long i = 0; i < v.size(); ++i)
- {
- h <<= 1;
- if (v(i) > 0)
- h |= 1;
- }
- return h;
- }
-
- matrix<double> proj;
- matrix<double,0,1> offset;
- };
-
-// ----------------------------------------------------------------------------------------
-
- inline void serialize (
- const projection_hash& item,
- std::ostream& out
- )
- {
- serialize(item.get_projection_matrix(), out);
- serialize(item.get_offset_matrix(), out);
- }
-
- inline void deserialize (
- projection_hash& item,
- std::istream& in
- )
- {
- matrix<double> proj;
- matrix<double,0,1> offset;
- deserialize(proj, in);
- deserialize(offset, in);
- item = projection_hash(proj, offset);
- }
-
-// ----------------------------------------------------------------------------------------
-
-}
-
-#endif // DLIB_PROJECTION_HASh_Hh_
-
diff --git a/ml/dlib/dlib/lsh/projection_hash_abstract.h b/ml/dlib/dlib/lsh/projection_hash_abstract.h
deleted file mode 100644
index abe78d10c..000000000
--- a/ml/dlib/dlib/lsh/projection_hash_abstract.h
+++ /dev/null
@@ -1,119 +0,0 @@
-// Copyright (C) 2011 Davis E. King (davis@dlib.net)
-// License: Boost Software License See LICENSE.txt for the full license.
-#undef DLIB_PROJECTION_HASh_ABSTRACT_Hh_
-#ifdef DLIB_PROJECTION_HASh_ABSTRACT_Hh_
-
-#include "../matrix.h"
-
-namespace dlib
-{
-
-// ----------------------------------------------------------------------------------------
-
- class projection_hash
- {
- /*!
- WHAT THIS OBJECT REPRESENTS
- This is a tool for hashing elements of a vector space into the integers.
- It is intended to represent locality sensitive hashing functions such as
- the popular random projection hashing method.
-
- In particular, it represents hash functions of the form:
- hash bit 0 = sign(rowm(P*v + O,0))
- hash bit 1 = sign(rowm(P*v + O,1))
- hash bit 2 = sign(rowm(P*v + O,2))
- ...
- Where v is the vector to be hashed. The parameters of the projection
- hash are the P and O matrices.
-
- THREAD SAFETY
- The const members of this object can be called concurrently from multiple
- threads, however, any operation that modifies the state of an instance of
- this object must serialize access to that instance.
- !*/
- public:
-
- projection_hash(
- );
- /*!
- ensures
- - #get_projection_matrix().size() == 0
- - #get_offset_matrix().size() == 0
- !*/
-
- template <typename EXP1, typename EXP2>
- projection_hash(
- const matrix_exp<EXP1>& proj,
- const matrix_exp<EXP2>& offset
- );
- /*!
- requires
- - proj.nr() == offset.nr()
- ensures
- - #get_projection_matrix() == proj
- - #get_offset_matrix() == offset
- !*/
-
- const matrix<double>& get_projection_matrix (
- ) const;
- /*!
- ensures
- - returns the P matrix discussed above in the WHAT THIS OBJECT REPRESENTS
- section.
- !*/
-
- const matrix<double,0,1>& get_offset_matrix (
- ) const;
- /*!
- ensures
- - returns the O matrix discussed above in the WHAT THIS OBJECT REPRESENTS
- section.
- !*/
-
- unsigned long num_hash_bins (
- ) const;
- /*!
- ensures
- - returns the number of possible outputs from this hashing function.
- - Specifically, returns: std::pow(2, get_offset_matrix().size())
- !*/
-
- template <typename EXP>
- unsigned long operator() (
- const matrix_exp<EXP>& v
- ) const;
- /*!
- requires
- - is_col_vector(v) == true
- - v.size() == get_projection_matrix().nc()
- - v.size() > 0
- ensures
- - hashes v into the range [0, num_hash_bins()) using the method
- discussed in the WHAT THIS OBJECT REPRESENTS section.
- !*/
- };
-
-// ----------------------------------------------------------------------------------------
-
- void serialize (
- const projection_hash& item,
- std::ostream& out
- );
- /*!
- provides serialization support
- !*/
-
- void deserialize (
- projection_hash& item,
- std::istream& in
- );
- /*!
- provides deserialization support
- !*/
-
-// ----------------------------------------------------------------------------------------
-
-}
-
-#endif // DLIB_PROJECTION_HASh_ABSTRACT_Hh_
-