6 files changed, 1122 insertions, 0 deletions
diff --git a/ml/dlib/dlib/lsh/create_random_projection_hash.h b/ml/dlib/dlib/lsh/create_random_projection_hash.h
new file mode 100644
index 000000000..b3aecd9ec
--- /dev/null
+++ b/ml/dlib/dlib/lsh/create_random_projection_hash.h
@@ -0,0 +1,232 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_CREATE_RANDOM_PROJECTION_HAsH_Hh_
+#define DLIB_CREATE_RANDOM_PROJECTION_HAsH_Hh_
+
+#include "create_random_projection_hash_abstract.h"
+#include "projection_hash.h"
+#include "../matrix.h"
+#include "../rand.h"
+#include "../statistics.h"
+#include "../svm.h"
+#include <vector>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename vector_type
+        >
+    projection_hash create_random_projection_hash (
+        const vector_type& v,
+        const int bits,
+        dlib::rand& rnd
+    ) 
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(0 < bits && bits <= 32 &&
+                    v.size() > 1,
+            "\t projection_hash create_random_projection_hash()"
+            << "\n\t Invalid arguments were given to this function."
+            << "\n\t bits: " << bits
+            << "\n\t v.size(): " << v.size() 
+            );
+
+#ifdef ENABLE_ASSERTS
+        for (unsigned long i = 0; i < v.size(); ++i)
+        {
+            DLIB_ASSERT(v[0].size() == v[i].size() && v[i].size() > 0 && is_col_vector(v[i]), 
+                    "\t projection_hash create_random_projection_hash()"
+                   << "\n\t Invalid arguments were given to this function."
+                   << "\n\t m(0).size(): " << v[0].size()
+                   << "\n\t m("<<i<<").size(): " << v[i].size() 
+                   << "\n\t is_col_vector(v["<<i<<"]): " << is_col_vector(v[i]) 
+                );
+        }
+#endif
+
+        running_covariance<matrix<double> > rc;
+        for (unsigned long i = 0; i < v.size(); ++i)
+            rc.add(matrix_cast<double>(v[i]));
+
+        // compute a whitening matrix
+        matrix<double> whiten = trans(chol(pinv(rc.covariance())));
+
+
+        // hashes
+        std::vector<unsigned long> h(v.size(),0);
+
+        std::vector<double> vals(v.size(),0);
+
+        // number of hits for each hash value
+        std::vector<unsigned long> counts;
+
+        std::vector<double> temp;
+
+        // build a random projection matrix
+        matrix<double> proj(bits, v[0].size());
+        for (long r = 0; r < proj.nr(); ++r)
+            for (long c = 0; c < proj.nc(); ++c)
+                proj(r,c) = rnd.get_random_gaussian();
+
+        // merge whitening matrix with projection matrix
+        proj = proj*whiten;
+
+        matrix<double,0,1> offset(bits);
+
+
+        // figure out what the offset values should be
+        for (int itr = 0; itr < offset.size(); ++itr)
+        {
+            counts.assign(static_cast<unsigned long>(std::pow(2.0,bits)), 0);
+            // count the popularity of each hash value
+            for (unsigned long i = 0; i < h.size(); ++i)
+            {
+                h[i] <<= 1;
+                counts[h[i]] += 1;
+            }
+
+            const unsigned long max_h = index_of_max(mat(counts));
+
+            temp.clear();
+            for (unsigned long i = 0; i < v.size(); ++i)
+            {
+                vals[i] = dot(rowm(proj,itr), matrix_cast<double>(v[i]));
+                if (h[i] == max_h)
+                    temp.push_back(vals[i]);
+            }
+
+            // split down the middle
+            std::sort(temp.begin(), temp.end());
+            const double split = temp[temp.size()/2];
+            offset(itr) = -split;
+
+            for (unsigned long i = 0; i < vals.size(); ++i)
+            {
+                if (vals[i] - split > 0)
+                    h[i] |= 1;
+            }
+        }
+
+
+        return projection_hash(proj, offset);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename vector_type
+        >
+    projection_hash create_random_projection_hash (
+        const vector_type& v,
+        const int bits
+    ) 
+    {
+        dlib::rand rnd;
+        return create_random_projection_hash(v,bits,rnd);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename vector_type
+        >
+    projection_hash create_max_margin_projection_hash (
+        const vector_type& v,
+        const int bits,
+        const double C,
+        dlib::rand& rnd
+    ) 
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(0 < bits && bits <= 32 &&
+                    v.size() > 1,
+            "\t projection_hash create_max_margin_projection_hash()"
+            << "\n\t Invalid arguments were given to this function."
+            << "\n\t bits: " << bits
+            << "\n\t v.size(): " << v.size() 
+            );
+
+#ifdef ENABLE_ASSERTS
+        for (unsigned long i = 0; i < v.size(); ++i)
+        {
+            DLIB_ASSERT(v[0].size() == v[i].size() && v[i].size() > 0 && is_col_vector(v[i]), 
+                    "\t projection_hash create_max_margin_projection_hash()"
+                   << "\n\t Invalid arguments were given to this function."
+                   << "\n\t m(0).size(): " << v[0].size()
+                   << "\n\t m("<<i<<").size(): " << v[i].size() 
+                   << "\n\t is_col_vector(v["<<i<<"]): " << is_col_vector(v[i]) 
+                );
+        }
+#endif
+
+        running_covariance<matrix<double> > rc;
+        for (unsigned long i = 0; i < v.size(); ++i)
+            rc.add(matrix_cast<double>(v[i]));
+
+        // compute a whitening matrix
+        matrix<double> whiten = trans(chol(pinv(rc.covariance())));
+        const matrix<double,0,1> meanval = whiten*rc.mean();
+
+
+
+        typedef matrix<double,0,1> sample_type;
+        random_subset_selector<sample_type> training_samples;
+        random_subset_selector<double> training_labels;
+        // We set this up to use enough samples to cover the vector space used by elements
+        // of v.  
+        training_samples.set_max_size(v[0].size()*10);
+        training_labels.set_max_size(v[0].size()*10);
+
+        matrix<double> proj(bits, v[0].size());
+        matrix<double,0,1> offset(bits);
+
+        // learn the random planes and put them into proj and offset.
+        for (int itr = 0; itr < offset.size(); ++itr)
+        {
+            training_samples.make_empty();
+            training_labels.make_empty();
+            // pick random training data and give each sample a random label.
+            for (unsigned long i = 0; i < v.size(); ++i)
+            {
+                training_samples.add(whiten*v[i]-meanval);
+                if (rnd.get_random_double() > 0.5)
+                    training_labels.add(+1);
+                else
+                    training_labels.add(-1);
+            }
+
+            svm_c_linear_dcd_trainer<linear_kernel<sample_type> > trainer;
+            trainer.set_c(C);
+            decision_function<linear_kernel<sample_type> > df = trainer.train(training_samples, training_labels);
+            offset(itr) = -df.b;
+            set_rowm(proj,itr) = trans(df.basis_vectors(0));
+        }
+
+
+        return projection_hash(proj*whiten, offset-proj*meanval);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename vector_type
+        >
+    projection_hash create_max_margin_projection_hash (
+        const vector_type& v,
+        const int bits,
+        const double C = 10
+    ) 
+    {
+        dlib::rand rnd;
+        return create_max_margin_projection_hash(v,bits,C,rnd);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_CREATE_RANDOM_PROJECTION_HAsH_Hh_
+
diff --git a/ml/dlib/dlib/lsh/create_random_projection_hash_abstract.h b/ml/dlib/dlib/lsh/create_random_projection_hash_abstract.h
new file mode 100644
index 000000000..cff55b9a5
--- /dev/null
+++ b/ml/dlib/dlib/lsh/create_random_projection_hash_abstract.h
@@ -0,0 +1,148 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_CREATE_RANDOM_PROJECTION_HAsH_ABSTRACT_Hh_
+#ifdef DLIB_CREATE_RANDOM_PROJECTION_HAsH_ABSTRACT_Hh_
+
+#include "projection_hash_abstract.h"
+#include "../rand.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename vector_type
+        >
+    projection_hash create_random_projection_hash (
+        const vector_type& v,
+        const int bits,
+        dlib::rand& rnd
+    );
+    /*!
+        requires
+            - 0 < bits <= 32
+            - v.size() > 1
+            - vector_type == a std::vector or compatible type containing dlib::matrix 
+              objects, each representing a column vector of the same size.
+            - for all valid i, j:
+                - is_col_vector(v[i]) == true 
+                - v[i].size() > 0
+                - v[i].size() == v[j].size() 
+                - i.e. v contains only column vectors and all the column vectors
+                  have the same non-zero length
+            - rand_type == a type that implements the dlib/rand/rand_kernel_abstract.h interface
+        ensures
+            - returns a hash function H such that:
+                - H.num_hash_bins() == pow(2,bits)
+                - H will be setup so that it hashes the contents of v such that each bin
+                  ends up with roughly the same number of elements in it.  This is
+                  accomplished by picking random hyperplanes passing though the data.  In
+                  particular, each plane normal vector is filled with Gaussian random
+                  numbers and we also perform basic centering to ensure the plane passes
+                  though the data.
+            - This function uses the supplied random number generator, rnd, to drive part
+              of it's processing.  Therefore, giving different random number generators
+              will produce different outputs.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename vector_type
+        >
+    projection_hash create_random_projection_hash (
+        const vector_type& v,
+        const int bits
+    );
+    /*!
+        requires
+            - 0 < bits <= 32
+            - v.size() > 1
+            - vector_type == a std::vector or compatible type containing dlib::matrix 
+              objects, each representing a column vector of the same size.
+            - for all valid i, j:
+                - is_col_vector(v[i]) == true 
+                - v[i].size() > 0
+                - v[i].size() == v[j].size() 
+                - i.e. v contains only column vectors and all the column vectors
+                  have the same non-zero length
+        ensures
+            - returns create_random_projection_hash(v,bits,dlib::rand())
+              (i.e. calls the above function with a default initialized random number generator)
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename vector_type
+        >
+    projection_hash create_max_margin_projection_hash (
+        const vector_type& v,
+        const int bits,
+        const double C,
+        dlib::rand& rnd
+    );
+    /*!
+        requires
+            - 0 < bits <= 32
+            - v.size() > 1
+            - vector_type == a std::vector or compatible type containing dlib::matrix 
+              objects, each representing a column vector of the same size.
+            - for all valid i, j:
+                - is_col_vector(v[i]) == true 
+                - v[i].size() > 0
+                - v[i].size() == v[j].size() 
+                - i.e. v contains only column vectors and all the column vectors
+                  have the same non-zero length
+            - rand_type == a type that implements the dlib/rand/rand_kernel_abstract.h interface
+        ensures
+            - returns a hash function H such that:
+                - H.num_hash_bins() == pow(2,bits)
+                - H will be setup so that it hashes the contents of v such that
+                  each bin ends up with roughly the same number of elements
+                  in it.  This is accomplished using a variation on the random hyperplane
+                  generation technique from the paper:
+                    Random Maximum Margin Hashing by Alexis Joly and Olivier Buisson
+                  In particular, we use the svm_c_linear_dcd_trainer to generate planes.
+                  We train it on randomly selected and randomly labeled points from v.
+                  The C SVM parameter is set to the given C argument.
+            - This function uses the supplied random number generator, rnd, to drive part
+              of it's processing.  Therefore, giving different random number generators
+              will produce different outputs.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename vector_type
+        >
+    projection_hash create_max_margin_projection_hash (
+        const vector_type& v,
+        const int bits,
+        const double C = 10
+    );
+    /*!
+        requires
+            - 0 < bits <= 32
+            - v.size() > 1
+            - vector_type == a std::vector or compatible type containing dlib::matrix 
+              objects, each representing a column vector of the same size.
+            - for all valid i, j:
+                - is_col_vector(v[i]) == true 
+                - v[i].size() > 0
+                - v[i].size() == v[j].size() 
+                - i.e. v contains only column vectors and all the column vectors
+                  have the same non-zero length
+        ensures
+            - returns create_max_margin_projection_hash(v,bits,C,dlib::rand())
+              (i.e. calls the above function with a default initialized random number generator)
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_CREATE_RANDOM_PROJECTION_HAsH_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/lsh/hashes.h b/ml/dlib/dlib/lsh/hashes.h
new file mode 100644
index 000000000..35053ce4e
--- /dev/null
+++ b/ml/dlib/dlib/lsh/hashes.h
@@ -0,0 +1,219 @@
+// Copyright (C) 2013  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_LSH_HAShES_Hh_
+#define DLIB_LSH_HAShES_Hh_
+
+#include "hashes_abstract.h"
+#include "../hash.h"
+#include "../matrix.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    class hash_similar_angles_64
+    {
+    public:
+        hash_similar_angles_64 (
+        ) : seed(0) {}
+
+        hash_similar_angles_64 (
+            const uint64 seed_ 
+        ) : seed(seed_) {}
+
+        uint64 get_seed (
+        ) const { return seed; }
+
+
+        typedef uint64 result_type;
+
+        template <
+            typename sparse_vector_type
+            >
+        typename disable_if<is_matrix<sparse_vector_type>,uint64>::type operator() (
+            const sparse_vector_type& v
+        ) const
+        {
+            typedef typename sparse_vector_type::value_type::second_type scalar_type;
+
+            uint64 temp = 0;
+            for (int i = 0; i < 64; ++i)
+            {
+                // compute the dot product between v and a Gaussian random vector.
+                scalar_type val = 0;
+                for (typename sparse_vector_type::const_iterator j = v.begin(); j != v.end(); ++j)
+                    val += j->second*gaussian_random_hash(j->first, i, seed);
+
+                if (val > 0)
+                    temp |= 1;
+                temp <<= 1;
+            }
+            return temp;
+        }
+
+        template <typename EXP>
+        uint64 operator() ( 
+            const matrix_exp<EXP>& v
+        ) const
+        {
+            typedef typename EXP::type T;
+            uint64 temp = 0;
+            for (unsigned long i = 0; i < 64; ++i)
+            {
+                if (dot(matrix_cast<T>(gaussian_randm(v.size(),1,i+seed*64)), v) > 0)
+                    temp |= 1;
+                temp <<= 1;
+            }
+            return temp;
+        }
+
+        unsigned int distance (
+            const result_type& a,
+            const result_type& b
+        ) const
+        {
+            return hamming_distance(a,b);
+        }
+
+    private:
+        const uint64 seed;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    class hash_similar_angles_128
+    {
+    public:
+        hash_similar_angles_128 (
+        ) : seed(0),hasher1(0), hasher2(1) {}
+
+        hash_similar_angles_128 (
+            const uint64 seed_
+        ) : seed(seed_),hasher1(2*seed),hasher2(2*seed+1) {}
+
+        uint64 get_seed (
+        ) const { return seed; }
+
+        typedef std::pair<uint64,uint64> result_type;
+
+        template <
+            typename vector_type 
+            >
+        result_type operator() (
+            const vector_type& v
+        ) const
+        {
+            return std::make_pair(hasher1(v), hasher2(v));
+        }
+
+        unsigned int distance (
+            const result_type& a,
+            const result_type& b
+        ) const
+        {
+            return hamming_distance(a.first,b.first) + 
+                   hamming_distance(a.second,b.second);
+        }
+
+    private:
+        const uint64 seed;
+        hash_similar_angles_64 hasher1;
+        hash_similar_angles_64 hasher2;
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    class hash_similar_angles_256
+    {
+    public:
+        hash_similar_angles_256 (
+        ) : seed(0), hasher1(0), hasher2(1) {}
+
+        hash_similar_angles_256 (
+            const uint64 seed_
+        ) : seed(seed_),hasher1(2*seed),hasher2(2*seed+1) {}
+
+        uint64 get_seed (
+        ) const { return seed; }
+
+        typedef std::pair<uint64,uint64> hash128_type;
+        typedef std::pair<hash128_type,hash128_type> result_type;
+
+        template <
+            typename vector_type 
+            >
+        result_type operator() (
+            const vector_type& v
+        ) const
+        {
+            return std::make_pair(hasher1(v), hasher2(v));
+        }
+
+        unsigned int distance (
+            const result_type& a,
+            const result_type& b
+        ) const
+        {
+            return hasher1.distance(a.first,b.first) + 
+                   hasher1.distance(a.second,b.second);
+        }
+
+    private:
+        const uint64 seed;
+        hash_similar_angles_128 hasher1;
+        hash_similar_angles_128 hasher2;
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    class hash_similar_angles_512
+    {
+    public:
+        hash_similar_angles_512 (
+        ) : seed(0), hasher1(0), hasher2(1) {}
+
+        hash_similar_angles_512 (
+            const uint64 seed_
+        ) : seed(seed_),hasher1(2*seed),hasher2(2*seed+1) {}
+
+        uint64 get_seed (
+        ) const { return seed; }
+
+
+        typedef hash_similar_angles_256::result_type hash256_type;
+        typedef std::pair<hash256_type,hash256_type> result_type;
+
+        template <
+            typename vector_type 
+            >
+        result_type operator() (
+            const vector_type& v
+        ) const
+        {
+            return std::make_pair(hasher1(v), hasher2(v));
+        }
+
+        unsigned int distance (
+            const result_type& a,
+            const result_type& b
+        ) const
+        {
+            return hasher1.distance(a.first,b.first) + 
+                   hasher1.distance(a.second,b.second);
+        }
+
+    private:
+        const uint64 seed;
+        hash_similar_angles_256 hasher1;
+        hash_similar_angles_256 hasher2;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_LSH_HAShES_Hh_
+
diff --git a/ml/dlib/dlib/lsh/hashes_abstract.h b/ml/dlib/dlib/lsh/hashes_abstract.h
new file mode 100644
index 000000000..27f8ddb69
--- /dev/null
+++ b/ml/dlib/dlib/lsh/hashes_abstract.h
@@ -0,0 +1,286 @@
+// Copyright (C) 2013  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_LSH_HAShES_ABSTRACT_Hh_
+#ifdef DLIB_LSH_HAShES_ABSTRACT_Hh_
+
+#include "../matrix.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    class hash_similar_angles_64
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for computing locality sensitive hashes that give
+                vectors with small angles between each other similar hash values.  In
+                particular, this object creates 64 random planes which pass though the
+                origin and uses them to create a 64bit hash.  To compute the hash for a new
+                vector, this object checks which side of each plane the vector falls on and
+                records this information into a 64bit integer.  
+        !*/
+
+    public:
+
+        hash_similar_angles_64 (
+        ); 
+        /*!
+            ensures
+                - #get_seed() == 0
+        !*/
+
+        hash_similar_angles_64 (
+            const uint64 seed
+        );
+        /*!
+            ensures
+                - #get_seed() == seed
+        !*/
+
+        uint64 get_seed (
+        ) const;
+        /*!
+            ensures
+                - returns the random seed used to generate the random planes used for
+                  hashing.
+        !*/
+
+        typedef uint64 result_type;
+
+        template <typename vector_type>
+        result_type perator() (
+            const vector_type& v
+        ) const;
+        /*!
+            requires
+                - v is an unsorted sparse vector or a dlib matrix representing either a
+                  column or row vector.
+            ensures
+                - returns a 64 bit hash of the input vector v.  The bits in the hash record
+                  which side of each random plane v falls on.  
+
+        !*/
+
+        unsigned int distance (
+            const result_type& a,
+            const result_type& b
+        ) const;
+        /*!
+            ensures
+                - returns the Hamming distance between the two hashes given to this
+                  function.  That is, we return the number of bits in a and b which differ.
+        !*/
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    struct hash_similar_angles_128
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for computing locality sensitive hashes that give
+                vectors with small angles between each other similar hash values.  In
+                particular, this object creates 128 random planes which pass though the
+                origin and uses them to create a 128bit hash.  To compute the hash for a new
+                vector, this object checks which side of each plane the vector falls on and
+                records this information into a 128bit integer.  
+        !*/
+
+    public:
+
+        hash_similar_angles_128 (
+        ); 
+        /*!
+            ensures
+                - #get_seed() == 0
+        !*/
+
+        hash_similar_angles_128 (
+            const uint64 seed
+        );
+        /*!
+            ensures
+                - #get_seed() == seed
+        !*/
+
+        uint64 get_seed (
+        ) const;
+        /*!
+            ensures
+                - returns the random seed used to generate the random planes used for
+                  hashing.
+        !*/
+
+        typedef std::pair<uint64,uint64> result_type;
+
+        template <typename vector_type>
+        result_type perator() (
+            const vector_type& v
+        ) const;
+        /*!
+            requires
+                - v is an unsorted sparse vector or a dlib matrix representing either a
+                  column or row vector.
+            ensures
+                - returns a 128 bit hash of the input vector v.  The bits in the hash record
+                  which side of each random plane v falls on.  
+
+        !*/
+
+        unsigned int distance (
+            const result_type& a,
+            const result_type& b
+        ) const;
+        /*!
+            ensures
+                - returns the Hamming distance between the two hashes given to this
+                  function.  That is, we return the number of bits in a and b which differ.
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    struct hash_similar_angles_256
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for computing locality sensitive hashes that give
+                vectors with small angles between each other similar hash values.  In
+                particular, this object creates 256 random planes which pass though the
+                origin and uses them to create a 256bit hash.  To compute the hash for a new
+                vector, this object checks which side of each plane the vector falls on and
+                records this information into a 256bit integer.  
+        !*/
+
+    public:
+
+        hash_similar_angles_256 (
+        ); 
+        /*!
+            ensures
+                - #get_seed() == 0
+        !*/
+
+        hash_similar_angles_256 (
+            const uint64 seed
+        );
+        /*!
+            ensures
+                - #get_seed() == seed
+        !*/
+
+        uint64 get_seed (
+        ) const;
+        /*!
+            ensures
+                - returns the random seed used to generate the random planes used for
+                  hashing.
+        !*/
+
+        typedef std::pair<uint64,uint64> hash128_type;
+        typedef std::pair<hash128_type,hash128_type> result_type;
+
+        template <typename vector_type>
+        result_type perator() (
+            const vector_type& v
+        ) const;
+        /*!
+            requires
+                - v is an unsorted sparse vector or a dlib matrix representing either a
+                  column or row vector.
+            ensures
+                - returns a 256 bit hash of the input vector v.  The bits in the hash record
+                  which side of each random plane v falls on.  
+
+        !*/
+
+        unsigned int distance (
+            const result_type& a,
+            const result_type& b
+        ) const;
+        /*!
+            ensures
+                - returns the Hamming distance between the two hashes given to this
+                  function.  That is, we return the number of bits in a and b which differ.
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    struct hash_similar_angles_512
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for computing locality sensitive hashes that give
+                vectors with small angles between each other similar hash values.  In
+                particular, this object creates 512 random planes which pass though the
+                origin and uses them to create a 512bit hash.  To compute the hash for a new
+                vector, this object checks which side of each plane the vector falls on and
+                records this information into a 512bit integer.  
+        !*/
+
+    public:
+
+        hash_similar_angles_512 (
+        ); 
+        /*!
+            ensures
+                - #get_seed() == 0
+        !*/
+
+        hash_similar_angles_512 (
+            const uint64 seed
+        );
+        /*!
+            ensures
+                - #get_seed() == seed
+        !*/
+
+        uint64 get_seed (
+        ) const;
+        /*!
+            ensures
+                - returns the random seed used to generate the random planes used for
+                  hashing.
+        !*/
+
+        typedef hash_similar_angles_256::result_type hash256_type;
+        typedef std::pair<hash256_type,hash256_type> result_type;
+
+        template <typename vector_type>
+        result_type perator() (
+            const vector_type& v
+        ) const;
+        /*!
+            requires
+                - v is an unsorted sparse vector or a dlib matrix representing either a
+                  column or row vector.
+            ensures
+                - returns a 512 bit hash of the input vector v.  The bits in the hash record
+                  which side of each random plane v falls on.  
+
+        !*/
+
+        unsigned int distance (
+            const result_type& a,
+            const result_type& b
+        ) const;
+        /*!
+            ensures
+                - returns the Hamming distance between the two hashes given to this
+                  function.  That is, we return the number of bits in a and b which differ.
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_LSH_HAShES_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/lsh/projection_hash.h b/ml/dlib/dlib/lsh/projection_hash.h
new file mode 100644
index 000000000..16de0ba11
--- /dev/null
+++ b/ml/dlib/dlib/lsh/projection_hash.h
@@ -0,0 +1,118 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_PROJECTION_HASh_Hh_
+#define DLIB_PROJECTION_HASh_Hh_
+
+#include "projection_hash_abstract.h"
+#include "../matrix.h"
+#include "../rand.h"
+#include <vector>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    class projection_hash
+    {
+    public:
+
+        projection_hash() {}
+
+        template <typename EXP1, typename EXP2>
+        projection_hash(
+            const matrix_exp<EXP1>& proj_,
+            const matrix_exp<EXP2>& offset_
+        ) : proj(proj_), offset(offset_) 
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(proj.nr() == offset.nr(),
+                "\t projection_hash::projection_hash()"
+                << "\n\t Invalid arguments were given to this function."
+                << "\n\t proj.nr():   " << proj.nr() 
+                << "\n\t offset.nr(): " << offset.nr() 
+                );
+
+        }
+
+        const matrix<double>& get_projection_matrix (
+        ) const { return proj; }
+
+        const matrix<double,0,1>& get_offset_matrix (
+        ) const { return offset; }
+
+        unsigned long num_hash_bins (
+        ) const
+        {
+            return static_cast<unsigned long>(std::pow(2.0, (double)offset.size()));
+        }
+
+        template <typename EXP>
+        unsigned long operator() (
+            const matrix_exp<EXP>& v
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_col_vector(v) && 
+                        v.size() == get_projection_matrix().nc() &&
+                        v.size() > 0,
+                "\t unsigned long projection_hash::operator()(v)"
+                << "\n\t Invalid arguments were given to this function."
+                << "\n\t is_col_vector(v):             " << is_col_vector(v) 
+                << "\n\t get_projection_matrix().nc(): " << get_projection_matrix().nc() 
+                << "\n\t v.size():                     " << v.size() 
+                );
+
+            return do_hash(proj*matrix_cast<double>(v) + offset);
+        }
+
+    private:
+
+        template <typename EXP>
+        unsigned long do_hash (
+            const matrix_exp<EXP>& v
+        ) const
+        {
+            unsigned long h = 0;
+            for (long i = 0; i < v.size(); ++i)
+            {
+                h <<= 1;
+                if (v(i) > 0)
+                    h |= 1;
+            }
+            return h;
+        }
+
+        matrix<double> proj;
+        matrix<double,0,1> offset;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    inline void serialize (
+        const projection_hash& item,
+        std::ostream& out
+    )
+    {
+        serialize(item.get_projection_matrix(), out);
+        serialize(item.get_offset_matrix(), out);
+    }
+
+    inline void deserialize (
+        projection_hash& item,
+        std::istream& in 
+    )
+    {
+        matrix<double> proj;
+        matrix<double,0,1> offset;
+        deserialize(proj, in);
+        deserialize(offset, in);
+        item = projection_hash(proj, offset);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_PROJECTION_HASh_Hh_
+
diff --git a/ml/dlib/dlib/lsh/projection_hash_abstract.h b/ml/dlib/dlib/lsh/projection_hash_abstract.h
new file mode 100644
index 000000000..abe78d10c
--- /dev/null
+++ b/ml/dlib/dlib/lsh/projection_hash_abstract.h
@@ -0,0 +1,119 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_PROJECTION_HASh_ABSTRACT_Hh_
+#ifdef DLIB_PROJECTION_HASh_ABSTRACT_Hh_
+
+#include "../matrix.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    class projection_hash
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This is a tool for hashing elements of a vector space into the integers.  
+                It is intended to represent locality sensitive hashing functions such as 
+                the popular random projection hashing method.
+                
+                In particular, it represents hash functions of the form:
+                    hash bit 0 = sign(rowm(P*v + O,0))
+                    hash bit 1 = sign(rowm(P*v + O,1))
+                    hash bit 2 = sign(rowm(P*v + O,2))
+                    ...
+                Where v is the vector to be hashed.  The parameters of the projection
+                hash are the P and O matrices.  
+
+            THREAD SAFETY
+                The const members of this object can be called concurrently from multiple
+                threads, however, any operation that modifies the state of an instance of
+                this object must serialize access to that instance.
+        !*/
+    public:
+
+        projection_hash(
+        );
+        /*!
+            ensures
+                - #get_projection_matrix().size() == 0
+                - #get_offset_matrix().size() == 0
+        !*/
+
+        template <typename EXP1, typename EXP2>
+        projection_hash(
+            const matrix_exp<EXP1>& proj,
+            const matrix_exp<EXP2>& offset
+        ); 
+        /*!
+            requires
+                - proj.nr() == offset.nr()
+            ensures
+                - #get_projection_matrix() == proj
+                - #get_offset_matrix() == offset
+        !*/
+
+        const matrix<double>& get_projection_matrix (
+        ) const;
+        /*!
+            ensures
+                - returns the P matrix discussed above in the WHAT THIS OBJECT REPRESENTS
+                  section.
+        !*/
+
+        const matrix<double,0,1>& get_offset_matrix (
+        ) const; 
+        /*!
+            ensures
+                - returns the O matrix discussed above in the WHAT THIS OBJECT REPRESENTS
+                  section.
+        !*/
+
+        unsigned long num_hash_bins (
+        ) const;
+        /*!
+            ensures
+                - returns the number of possible outputs from this hashing function.
+                - Specifically, returns: std::pow(2, get_offset_matrix().size())
+        !*/
+
+        template <typename EXP>
+        unsigned long operator() (
+            const matrix_exp<EXP>& v
+        ) const;
+        /*!
+            requires
+                - is_col_vector(v) == true
+                - v.size() == get_projection_matrix().nc()
+                - v.size() > 0
+            ensures
+                - hashes v into the range [0, num_hash_bins()) using the method
+                  discussed in the WHAT THIS OBJECT REPRESENTS section.
+        !*/
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    void serialize (
+        const projection_hash& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support 
+    !*/
+
+    void deserialize (
+        projection_hash& item,
+        std::istream& in 
+    );
+    /*!
+        provides deserialization support 
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_PROJECTION_HASh_ABSTRACT_Hh_
+